Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(glam) add fully qualified table names in legacy telemetry queries #5559

Merged
merged 5 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
WITH preconditions AS (
SELECT
IF(
(SELECT MAX(submission_date) FROM clients_histogram_aggregates_v2) = DATE_SUB(
DATE(@submission_date),
INTERVAL 1 DAY
),
(
SELECT
MAX(submission_date)
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2`
) = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY),
TRUE,
ERROR('Pre-condition failed: table clients_histogram_aggregates_v2 must be up to date')
) histogram_aggregates_up_to_date
Expand All @@ -13,7 +15,7 @@ filtered_date_channel AS (
SELECT
* EXCEPT (histogram_aggregates_up_to_date)
FROM
clients_daily_histogram_aggregates_v1,
`moz-fx-data-shared-prod.telemetry_derived.clients_daily_histogram_aggregates_v1`,
preconditions
WHERE
preconditions.histogram_aggregates_up_to_date
Expand Down Expand Up @@ -64,7 +66,7 @@ version_filtered_new AS (
FROM
filtered_aggregates AS hist_aggs
LEFT JOIN
latest_versions
`moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions
ON latest_versions.channel = hist_aggs.channel
WHERE
CAST(app_version AS INT64) >= (latest_version - 2)
Expand All @@ -85,7 +87,7 @@ aggregated_histograms AS (
key,
process,
agg_type,
udf.map_sum(ARRAY_CONCAT_AGG(value)) AS aggregates
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS aggregates
FROM
version_filtered_new
GROUP BY
Expand All @@ -105,7 +107,7 @@ aggregated_histograms AS (
latest_version
)
SELECT
udf_js.sample_id(client_id) AS sample_id,
`moz-fx-data-shared-prod`.udf_js.sample_id(client_id) AS sample_id,
client_id,
os,
app_version,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
RETURNS ARRAY<STRUCT<
RETURNS ARRAY<
STRUCT<
first_bucket INT64,
last_bucket INT64,
num_buckets INT64,
Expand All @@ -8,19 +9,23 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
key STRING,
process STRING,
agg_type STRING,
aggregates ARRAY<STRUCT<key STRING, value INT64>>>> AS (
aggregates ARRAY<STRUCT<key STRING, value INT64>>
>
> AS (
(
WITH unnested AS
(SELECT *
FROM UNNEST(old_aggs)

WITH unnested AS (
SELECT
*
FROM
UNNEST(old_aggs)
UNION ALL

SELECT *
FROM UNNEST(new_aggs)),

aggregated_data AS
(SELECT AS STRUCT
SELECT
*
FROM
UNNEST(new_aggs)
),
aggregated_data AS (
SELECT AS STRUCT
first_bucket,
last_bucket,
num_buckets,
Expand All @@ -30,7 +35,8 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
process,
agg_type,
mozfun.map.sum(ARRAY_CONCAT_AGG(aggregates)) AS histogram_aggregates
FROM unnested
FROM
unnested
GROUP BY
first_bucket,
last_bucket,
Expand All @@ -39,37 +45,48 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
metric_type,
key,
process,
agg_type)

SELECT ARRAY_AGG((
first_bucket,
last_bucket,
num_buckets,
metric,
metric_type,
key,
process,
agg_type,
histogram_aggregates))
FROM aggregated_data
agg_type
)
SELECT
ARRAY_AGG(
(
first_bucket,
last_bucket,
num_buckets,
metric,
metric_type,
key,
process,
agg_type,
histogram_aggregates
)
)
FROM
aggregated_data
)
);

WITH clients_histogram_aggregates_new AS
(SELECT *
FROM clients_histogram_aggregates_new_v1
WHERE sample_id >= @min_sample_id
AND sample_id <= @max_sample_id),

clients_histogram_aggregates_partition AS
(SELECT *
FROM clients_histogram_aggregates_v1
WHERE submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
WITH clients_histogram_aggregates_new AS (
SELECT
*
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_new_v1`
WHERE
sample_id >= @min_sample_id
AND sample_id <= @max_sample_id
),
clients_histogram_aggregates_partition AS (
SELECT
*
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v1`
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND sample_id >= @min_sample_id
AND sample_id <= @max_sample_id),

clients_histogram_aggregates_old AS
(SELECT
AND sample_id <= @max_sample_id
),
clients_histogram_aggregates_old AS (
SELECT
sample_id,
client_id,
os,
Expand All @@ -78,13 +95,16 @@ clients_histogram_aggregates_old AS
hist_aggs.channel AS channel,
CONCAT(client_id, os, app_version, app_build_id, hist_aggs.channel) AS join_key,
histogram_aggregates
FROM clients_histogram_aggregates_partition AS hist_aggs
LEFT JOIN latest_versions
ON latest_versions.channel = hist_aggs.channel
WHERE app_version >= (latest_version - 2)),

merged AS
(SELECT
FROM
clients_histogram_aggregates_partition AS hist_aggs
LEFT JOIN
`moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions
ON latest_versions.channel = hist_aggs.channel
WHERE
app_version >= (latest_version - 2)
),
merged AS (
SELECT
COALESCE(old_data.sample_id, new_data.sample_id) AS sample_id,
COALESCE(old_data.client_id, new_data.client_id) AS client_id,
COALESCE(old_data.os, new_data.os) AS os,
Expand All @@ -93,22 +113,25 @@ merged AS
COALESCE(old_data.channel, new_data.channel) AS channel,
old_data.histogram_aggregates AS old_aggs,
ARRAY(
SELECT AS STRUCT
first_bucket,
last_bucket,
num_buckets,
metric,
metric_type,
key,
process,
agg_type,
aggregates
FROM UNNEST(new_data.histogram_aggregates)
SELECT AS STRUCT
first_bucket,
last_bucket,
num_buckets,
metric,
metric_type,
key,
process,
agg_type,
aggregates
FROM
UNNEST(new_data.histogram_aggregates)
) AS new_aggs
FROM clients_histogram_aggregates_old AS old_data
FULL OUTER JOIN clients_histogram_aggregates_new AS new_data
ON new_data.join_key = old_data.join_key)

FROM
clients_histogram_aggregates_old AS old_data
FULL OUTER JOIN
clients_histogram_aggregates_new AS new_data
ON new_data.join_key = old_data.join_key
)
SELECT
@submission_date AS submission_date,
sample_id,
Expand All @@ -118,4 +141,5 @@ SELECT
app_build_id,
channel,
udf_merged_user_data(old_aggs, new_aggs) AS histogram_aggregates
FROM merged
FROM
merged
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,12 @@ RETURNS ARRAY<
WITH preconditions AS (
SELECT
IF(
(SELECT MAX(submission_date) FROM clients_histogram_aggregates_v2) = DATE_SUB(
DATE(@submission_date),
INTERVAL 1 DAY
),
(
SELECT
MAX(submission_date)
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2`
) = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY),
TRUE,
ERROR('Pre-condition failed: Current submission_date parameter skips a day or more of data.')
) histogram_aggregates_up_to_date
Expand All @@ -98,7 +100,7 @@ clients_histogram_aggregates_new AS (
SELECT
* EXCEPT (histogram_aggregates_up_to_date)
FROM
telemetry_derived.clients_histogram_aggregates_new_v1,
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_new_v1`,
preconditions
WHERE
preconditions.histogram_aggregates_up_to_date
Expand All @@ -109,7 +111,9 @@ clients_histogram_aggregates_partition AS (
SELECT
*
FROM
telemetry_derived.clients_histogram_aggregates_v2
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2`
WHERE
submission_date = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this parameter to be extra safe. The table is overwritten at every execution so there should only be one submission date, but just in case there are more...

),
clients_histogram_aggregates_old AS (
SELECT
Expand All @@ -125,7 +129,7 @@ clients_histogram_aggregates_old AS (
FROM
clients_histogram_aggregates_partition AS hist_aggs
LEFT JOIN
latest_versions
`moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions
ON latest_versions.channel = hist_aggs.channel
WHERE
app_version >= (latest_version - 2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ WITH filtered_data AS (
os = 'Windows'
AND channel = 'release' AS sampled
FROM
clients_histogram_aggregates_v2
`moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2`
CROSS JOIN
UNNEST(histogram_aggregates)
WHERE
Expand Down