Skip to content

Commit

Permalink
Bug 1735563 - Add events_daily for Mozilla VPN (#2429)
Browse files Browse the repository at this point in the history
Co-authored-by: akkomar <akkomar@users.noreply.github.com>
  • Loading branch information
wlach and akkomar committed Oct 14, 2021
1 parent 2e2393a commit 821f9d4
Show file tree
Hide file tree
Showing 20 changed files with 739 additions and 3 deletions.
Expand Up @@ -12,3 +12,7 @@ messaging_system:
firefox_accounts:
base_dataset: firefox_accounts_derived
view_dataset: firefox_accounts
# Mozilla VPN
mozilla_vpn:
base_dataset: mozilla_vpn_derived
view_dataset: mozilla_vpn
Expand Up @@ -16,7 +16,7 @@ WITH source AS (
name AS event,
extra,
FROM
{{ glean_app_id }}.events e
{{ glean_app_id }}.{{ events_table_name }} e
CROSS JOIN
UNNEST(e.events) AS event
{% if not loop.last %}
Expand Down
Expand Up @@ -8,7 +8,7 @@ WITH all_events AS (
name AS event,
extra,
FROM
{{ glean_app_id }}.events e
{{ glean_app_id }}.{{ events_table_name }} e
CROSS JOIN
UNNEST(e.events) AS event
{% if not loop.last %}
Expand Down
@@ -1,6 +1,7 @@
fenix_derived:
name: Firefox for Android
dataset: fenix
events_table_name: events
glean: True
glean_app_ids: [ org_mozilla_firefox, org_mozilla_firefox_beta, org_mozilla_fenix ]
start_date: 2020-01-01
Expand Down Expand Up @@ -29,3 +30,14 @@ firefox_accounts_derived:
start_date: 2020-01-01
max_property_values: 1000
dag_name: bqetl_event_rollup
mozilla_vpn_derived:
name: Mozilla VPN
dataset: mozilla_vpn
events_table_name: main
glean: True
glean_app_ids: [ mozillavpn ]
start_date: 2021-10-01
skipped_properties:
- time_ms
max_property_values: 1000
dag_name: bqetl_event_rollup
Expand Up @@ -14,3 +14,7 @@ firefox_accounts_derived:
name: Firefox Accounts
dataset: firefox_accounts
dag_name: bqetl_event_rollup
mozilla_vpn_derived:
name: Mozilla VPN
dataset: mozilla_vpn
dag_name: bqetl_event_rollup
Expand Up @@ -12,3 +12,6 @@ messaging_system:
firefox_accounts:
base_dataset: firefox_accounts_derived
view_dataset: firefox_accounts
mozilla_vpn:
base_dataset: mozilla_vpn_derived
view_dataset: mozilla_vpn
Expand Up @@ -23,7 +23,7 @@ WITH sample AS (
UNNEST(ping_info.experiments)
) AS experiments
FROM
{{ glean_app_id }}.events e
{{ glean_app_id }}.{{ events_table_name }} e
CROSS JOIN
UNNEST(e.events) AS event
{% if not loop.last %}
Expand Down
Expand Up @@ -2,6 +2,7 @@ fenix_derived:
name: Firefox for Android
include_normalized_fields: True
include_metadata_fields: True
events_table_name: events
glean: True
dataset: fenix
glean_app_ids: [ org_mozilla_firefox, org_mozilla_firefox_beta, org_mozilla_fenix ]
Expand Down Expand Up @@ -124,3 +125,28 @@ firefox_accounts_derived:
dest: country
- src: language
dest: language
mozilla_vpn_derived:
name: Mozilla VPN
include_normalized_fields: True
include_metadata_fields: True
events_table_name: main
glean: True
dataset: mozilla_vpn
glean_app_ids: [ mozillavpn ]
start_date: 2021-10-01
dag_name: bqetl_event_rollup
user_properties:
- src: app_build
dest: app_build
- src: app_channel
dest: app_channel
- src: app_display_version
dest: app_display_version
- src: architecture
dest: architecture
- src: first_run_date
dest: first_run_date
- src: telemetry_sdk_build
dest: telemetry_sdk_build
- src: locale
dest: locale
49 changes: 49 additions & 0 deletions dags/bqetl_event_rollup.py
Expand Up @@ -121,6 +121,43 @@
dag=dag,
)

mozilla_vpn_derived__event_types__v1 = bigquery_etl_query(
task_id="mozilla_vpn_derived__event_types__v1",
destination_table="event_types_v1",
dataset_id="mozilla_vpn_derived",
project_id="moz-fx-data-shared-prod",
owner="wlachance@mozilla.com",
email=["akomar@mozilla.com", "wlachance@mozilla.com"],
date_partition_parameter=None,
depends_on_past=False,
parameters=["submission_date:DATE:{{ds}}"],
dag=dag,
)

mozilla_vpn_derived__event_types_history__v1 = bigquery_etl_query(
task_id="mozilla_vpn_derived__event_types_history__v1",
destination_table="event_types_history_v1",
dataset_id="mozilla_vpn_derived",
project_id="moz-fx-data-shared-prod",
owner="wlachance@mozilla.com",
email=["akomar@mozilla.com", "wlachance@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=True,
dag=dag,
)

mozilla_vpn_derived__events_daily__v1 = bigquery_etl_query(
task_id="mozilla_vpn_derived__events_daily__v1",
destination_table="events_daily_v1",
dataset_id="mozilla_vpn_derived",
project_id="moz-fx-data-shared-prod",
owner="wlachance@mozilla.com",
email=["akomar@mozilla.com", "wlachance@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)

telemetry_derived__event_types__v1 = bigquery_etl_query(
task_id="telemetry_derived__event_types__v1",
destination_table="event_types_v1",
Expand Down Expand Up @@ -223,6 +260,18 @@
messaging_system_derived__event_types__v1
)

mozilla_vpn_derived__event_types__v1.set_upstream(
mozilla_vpn_derived__event_types_history__v1
)

mozilla_vpn_derived__event_types_history__v1.set_upstream(
wait_for_copy_deduplicate_all
)

mozilla_vpn_derived__events_daily__v1.set_upstream(
mozilla_vpn_derived__event_types__v1
)

telemetry_derived__event_types__v1.set_upstream(
telemetry_derived__event_types_history__v1
)
Expand Down
8 changes: 8 additions & 0 deletions sql/moz-fx-data-shared-prod/mozilla_vpn/event_types/view.sql
@@ -0,0 +1,8 @@
-- Generated by bigquery_etl/events_daily/generate_queries.py
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.mozilla_vpn.event_types`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.mozilla_vpn_derived.event_types_v1`
8 changes: 8 additions & 0 deletions sql/moz-fx-data-shared-prod/mozilla_vpn/events_daily/view.sql
@@ -0,0 +1,8 @@
-- Generated by bigquery_etl/events_daily/generate_queries.py
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.mozilla_vpn.events_daily`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.mozilla_vpn_derived.events_daily_v1`
@@ -0,0 +1,162 @@
-- Generated by bigquery_etl/events_daily/generate_queries.py
CREATE OR REPLACE TABLE
mozilla_vpn_derived.event_types_history_v1
PARTITION BY
submission_date
CLUSTER BY
category,
event
AS
WITH source AS (
SELECT
DATE(submission_timestamp) AS submission_date,
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL timestamp MILLISECOND) AS timestamp,
category,
name AS event,
extra,
FROM
mozillavpn.main e
CROSS JOIN
UNNEST(e.events) AS event
),
sample AS (
SELECT
*
FROM
source
WHERE
submission_date >= '2021-10-01'
),
primary_event_types AS (
SELECT
category,
event,
MIN(timestamp) AS first_timestamp,
ROW_NUMBER() OVER (ORDER BY MIN(timestamp) ASC, category ASC, event ASC) AS primary_index,
FROM
sample
GROUP BY
category,
event
),
event_property_indices AS (
SELECT
category,
event,
MIN(timestamp) AS first_timestamp,
event_property.key AS event_property,
ROW_NUMBER() OVER (
PARTITION BY
category,
event
ORDER BY
MIN(timestamp) ASC,
event_property.key ASC
) AS event_property_index,
FROM
sample,
UNNEST(extra) AS event_property
LEFT JOIN
UNNEST(CAST(['time_ms'] AS ARRAY<STRING>)) skipped_property
ON
skipped_property = event_property.key
WHERE
skipped_property IS NULL
GROUP BY
category,
event,
event_property
),
event_property_value_indices AS (
SELECT
category,
event,
MIN(timestamp) AS first_timestamp,
event_property.key AS event_property,
event_property.value AS event_property_value,
ROW_NUMBER() OVER (
PARTITION BY
category,
event,
event_property.key
ORDER BY
MIN(timestamp) ASC,
event_property.value ASC
) AS event_property_value_index,
FROM
sample,
UNNEST(extra) AS event_property
GROUP BY
category,
event,
event_property,
event_property_value
),
per_event_property AS (
SELECT
category,
event,
event_property,
event_property_index,
ARRAY_AGG(
STRUCT(
event_property_value AS key,
udf.event_code_points_to_string([event_property_value_index]) AS value,
event_property_value_index AS index
)
ORDER BY
event_property_value_index ASC
) AS values,
FROM
event_property_value_indices
INNER JOIN
event_property_indices
USING
(category, event, event_property)
WHERE
event_property_value_index <= 1000
GROUP BY
category,
event,
event_property,
event_property_index
),
per_event AS (
SELECT
category,
event,
first_timestamp,
primary_index AS numeric_index,
udf.event_code_points_to_string([primary_index]) AS index,
ARRAY_AGG(
IF(
event_property IS NULL,
NULL,
STRUCT(event_property AS key, VALUES AS value, event_property_index AS index)
) IGNORE NULLS
ORDER BY
event_property_index ASC
) AS event_properties
FROM
primary_event_types
LEFT JOIN
per_event_property
USING
(category, event)
GROUP BY
category,
event,
first_timestamp,
primary_index
),
max_date AS (
SELECT
MAX(submission_date) AS submission_date
FROM
sample
)
SELECT
*
FROM
per_event,
max_date
@@ -0,0 +1,19 @@
# Generated by bigquery_etl/events_daily/generate_queries.py

---
friendly_name: 'Mozilla VPN Event Types History'
description: >
Retrieve the set of [events, event_properties]
and record them in a table.
This table stores all of history, partitioned by
submission_date.
owners:
- wlachance@mozilla.com
- akomar@mozilla.com
labels:
application: mozilla_vpn
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_event_rollup
depends_on_past: true

0 comments on commit 821f9d4

Please sign in to comment.