From 7a7bdb325587d108a077906c00ecc74410f1571b Mon Sep 17 00:00:00 2001 From: Eduardo Filho Date: Fri, 10 May 2024 17:05:23 -0400 Subject: [PATCH 1/3] fix(glam) fix table names to fully qualified --- .../query.sql | 18 ++- .../clients_histogram_aggregates_v1/query.sql | 150 ++++++++++-------- .../clients_histogram_aggregates_v2/query.sql | 18 ++- .../query.sql | 2 +- .../query.sql | 109 ++++++------- .../query.sql | 19 ++- .../clients_scalar_aggregates_v1/query.sql | 12 +- .../clients_scalar_probe_counts_v1/query.sql | 14 +- .../glam_sample_counts_v1/query.sql | 4 +- .../glam_user_counts_v1/query.sql | 35 ++-- .../histogram_percentiles_v1/query.sql | 2 +- .../scalar_percentiles_v1/query.sql | 73 +++++---- 12 files changed, 243 insertions(+), 213 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_new_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_new_v1/query.sql index 0ffd4535824..da9dcf2aa84 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_new_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_new_v1/query.sql @@ -1,10 +1,12 @@ WITH preconditions AS ( SELECT IF( - (SELECT MAX(submission_date) FROM clients_histogram_aggregates_v2) = DATE_SUB( - DATE(@submission_date), - INTERVAL 1 DAY - ), + ( + SELECT + MAX(submission_date) + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2` + ) = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY), TRUE, ERROR('Pre-condition failed: table clients_histogram_aggregates_v2 must be up to date') ) histogram_aggregates_up_to_date @@ -13,7 +15,7 @@ filtered_date_channel AS ( SELECT * EXCEPT (histogram_aggregates_up_to_date) FROM - clients_daily_histogram_aggregates_v1, + `moz-fx-data-shared-prod.telemetry_derived.clients_daily_histogram_aggregates_v1`, preconditions WHERE preconditions.histogram_aggregates_up_to_date @@ -64,7 +66,7 @@ version_filtered_new AS ( FROM filtered_aggregates AS hist_aggs LEFT JOIN - latest_versions + `moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions ON latest_versions.channel = hist_aggs.channel WHERE CAST(app_version AS INT64) >= (latest_version - 2) @@ -85,7 +87,7 @@ aggregated_histograms AS ( key, process, agg_type, - udf.map_sum(ARRAY_CONCAT_AGG(value)) AS aggregates + `moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS aggregates FROM version_filtered_new GROUP BY @@ -105,7 +107,7 @@ aggregated_histograms AS ( latest_version ) SELECT - udf_js.sample_id(client_id) AS sample_id, + `moz-fx-data-shared-prod`.udf_js.sample_id(client_id) AS sample_id, client_id, os, app_version, diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v1/query.sql index db9df2e5a1d..8ee6540f5c1 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v1/query.sql @@ -1,5 +1,6 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE) - RETURNS ARRAY>>> AS ( + aggregates ARRAY> + > +> AS ( ( - WITH unnested AS - (SELECT * - FROM UNNEST(old_aggs) - + WITH unnested AS ( + SELECT + * + FROM + UNNEST(old_aggs) UNION ALL - - SELECT * - FROM UNNEST(new_aggs)), - - aggregated_data AS - (SELECT AS STRUCT + SELECT + * + FROM + UNNEST(new_aggs) + ), + aggregated_data AS ( + SELECT AS STRUCT first_bucket, last_bucket, num_buckets, @@ -30,7 +35,8 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE) process, agg_type, mozfun.map.sum(ARRAY_CONCAT_AGG(aggregates)) AS histogram_aggregates - FROM unnested + FROM + unnested GROUP BY first_bucket, last_bucket, @@ -39,37 +45,48 @@ CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE) metric_type, key, process, - agg_type) - - SELECT ARRAY_AGG(( - first_bucket, - last_bucket, - num_buckets, - metric, - metric_type, - key, - process, - agg_type, - histogram_aggregates)) - FROM aggregated_data + agg_type + ) + SELECT + ARRAY_AGG( + ( + first_bucket, + last_bucket, + num_buckets, + metric, + metric_type, + key, + process, + agg_type, + histogram_aggregates + ) + ) + FROM + aggregated_data ) ); -WITH clients_histogram_aggregates_new AS - (SELECT * - FROM clients_histogram_aggregates_new_v1 - WHERE sample_id >= @min_sample_id - AND sample_id <= @max_sample_id), - -clients_histogram_aggregates_partition AS - (SELECT * - FROM clients_histogram_aggregates_v1 - WHERE submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) +WITH clients_histogram_aggregates_new AS ( + SELECT + * + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_new_v1` + WHERE + sample_id >= @min_sample_id + AND sample_id <= @max_sample_id +), +clients_histogram_aggregates_partition AS ( + SELECT + * + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v1` + WHERE + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) AND sample_id >= @min_sample_id - AND sample_id <= @max_sample_id), - -clients_histogram_aggregates_old AS - (SELECT + AND sample_id <= @max_sample_id +), +clients_histogram_aggregates_old AS ( + SELECT sample_id, client_id, os, @@ -78,13 +95,16 @@ clients_histogram_aggregates_old AS hist_aggs.channel AS channel, CONCAT(client_id, os, app_version, app_build_id, hist_aggs.channel) AS join_key, histogram_aggregates - FROM clients_histogram_aggregates_partition AS hist_aggs - LEFT JOIN latest_versions - ON latest_versions.channel = hist_aggs.channel - WHERE app_version >= (latest_version - 2)), - -merged AS - (SELECT + FROM + clients_histogram_aggregates_partition AS hist_aggs + LEFT JOIN + `moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions + ON latest_versions.channel = hist_aggs.channel + WHERE + app_version >= (latest_version - 2) +), +merged AS ( + SELECT COALESCE(old_data.sample_id, new_data.sample_id) AS sample_id, COALESCE(old_data.client_id, new_data.client_id) AS client_id, COALESCE(old_data.os, new_data.os) AS os, @@ -93,22 +113,25 @@ merged AS COALESCE(old_data.channel, new_data.channel) AS channel, old_data.histogram_aggregates AS old_aggs, ARRAY( - SELECT AS STRUCT - first_bucket, - last_bucket, - num_buckets, - metric, - metric_type, - key, - process, - agg_type, - aggregates - FROM UNNEST(new_data.histogram_aggregates) + SELECT AS STRUCT + first_bucket, + last_bucket, + num_buckets, + metric, + metric_type, + key, + process, + agg_type, + aggregates + FROM + UNNEST(new_data.histogram_aggregates) ) AS new_aggs - FROM clients_histogram_aggregates_old AS old_data - FULL OUTER JOIN clients_histogram_aggregates_new AS new_data - ON new_data.join_key = old_data.join_key) - + FROM + clients_histogram_aggregates_old AS old_data + FULL OUTER JOIN + clients_histogram_aggregates_new AS new_data + ON new_data.join_key = old_data.join_key +) SELECT @submission_date AS submission_date, sample_id, @@ -118,4 +141,5 @@ SELECT app_build_id, channel, udf_merged_user_data(old_aggs, new_aggs) AS histogram_aggregates -FROM merged +FROM + merged diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v2/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v2/query.sql index 3029bf774e3..24623178ea7 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v2/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_aggregates_v2/query.sql @@ -86,10 +86,12 @@ RETURNS ARRAY< WITH preconditions AS ( SELECT IF( - (SELECT MAX(submission_date) FROM clients_histogram_aggregates_v2) = DATE_SUB( - DATE(@submission_date), - INTERVAL 1 DAY - ), + ( + SELECT + MAX(submission_date) + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2` + ) = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY), TRUE, ERROR('Pre-condition failed: Current submission_date parameter skips a day or more of data.') ) histogram_aggregates_up_to_date @@ -98,7 +100,7 @@ clients_histogram_aggregates_new AS ( SELECT * EXCEPT (histogram_aggregates_up_to_date) FROM - telemetry_derived.clients_histogram_aggregates_new_v1, + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_new_v1`, preconditions WHERE preconditions.histogram_aggregates_up_to_date @@ -109,7 +111,9 @@ clients_histogram_aggregates_partition AS ( SELECT * FROM - telemetry_derived.clients_histogram_aggregates_v2 + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2` + WHERE + submission_date = DATE_SUB(DATE(@submission_date), INTERVAL 1 DAY) ), clients_histogram_aggregates_old AS ( SELECT @@ -125,7 +129,7 @@ clients_histogram_aggregates_old AS ( FROM clients_histogram_aggregates_partition AS hist_aggs LEFT JOIN - latest_versions + `moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions ON latest_versions.channel = hist_aggs.channel WHERE app_version >= (latest_version - 2) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql index 7ce48e02aad..e7aed2e900e 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql @@ -18,7 +18,7 @@ WITH filtered_data AS ( os = 'Windows' AND channel = 'release' AS sampled FROM - clients_histogram_aggregates_v2 + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2` CROSS JOIN UNNEST(histogram_aggregates) WHERE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/query.sql index 27557afafec..9bbb7e48d46 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/query.sql @@ -1,26 +1,23 @@ -CREATE TEMP FUNCTION - udf_get_buckets(min INT64, - max INT64, - num INT64, - metric_type STRING) - RETURNS ARRAY AS ( ( - WITH - buckets AS ( +CREATE TEMP FUNCTION udf_get_buckets(min INT64, max INT64, num INT64, metric_type STRING) +RETURNS ARRAY AS ( + ( + WITH buckets AS ( SELECT CASE - WHEN metric_type = 'histogram-exponential' THEN mozfun.glam.histogram_generate_exponential_buckets(min, max, num) - ELSE - mozfun.glam.histogram_generate_linear_buckets(min, - max, - num) - END - AS arr ) + WHEN metric_type = 'histogram-exponential' + THEN mozfun.glam.histogram_generate_exponential_buckets(min, max, num) + ELSE mozfun.glam.histogram_generate_linear_buckets(min, max, num) + END AS arr + ) SELECT ARRAY_AGG(CAST(item AS INT64)) FROM buckets CROSS JOIN - UNNEST(arr) AS item ) ); + UNNEST(arr) AS item + ) +); + WITH aggregates AS ( SELECT os, @@ -37,14 +34,15 @@ WITH aggregates AS ( agg_type AS client_agg_type, 'histogram' AS agg_type, CAST(ROUND(SUM(record.value)) AS INT64) AS total_users, - mozfun.glam.histogram_fill_buckets_dirichlet( mozfun.map.sum(ARRAY_AGG(record)), - mozfun.glam.histogram_buckets_cast_string_array(udf_get_buckets(first_bucket, - MAX(last_bucket), - MAX(num_buckets), - metric_type)), - CAST(ROUND(SUM(record.value)) AS INT64) ) AS aggregates + mozfun.glam.histogram_fill_buckets_dirichlet( + mozfun.map.sum(ARRAY_AGG(record)), + mozfun.glam.histogram_buckets_cast_string_array( + udf_get_buckets(first_bucket, MAX(last_bucket), MAX(num_buckets), metric_type) + ), + CAST(ROUND(SUM(record.value)) AS INT64) + ) AS aggregates FROM - clients_histogram_bucket_counts_v1 AS bucket_counts + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1` AS bucket_counts GROUP BY os, app_version, @@ -55,8 +53,9 @@ WITH aggregates AS ( KEY, process, client_agg_type, - first_bucket ), - non_norm_aggregates AS ( + first_bucket +), +non_norm_aggregates AS ( SELECT os, app_version, @@ -71,13 +70,14 @@ WITH aggregates AS ( MAX(num_buckets) AS num_buckets, agg_type AS client_agg_type, 'histogram' AS agg_type, - mozfun.glam.histogram_fill_buckets( mozfun.map.sum(ARRAY_AGG(record)), - mozfun.glam.histogram_buckets_cast_string_array(udf_get_buckets(first_bucket, - MAX(last_bucket), - MAX(num_buckets), - metric_type))) AS non_norm_aggregates, + mozfun.glam.histogram_fill_buckets( + mozfun.map.sum(ARRAY_AGG(record)), + mozfun.glam.histogram_buckets_cast_string_array( + udf_get_buckets(first_bucket, MAX(last_bucket), MAX(num_buckets), metric_type) + ) + ) AS non_norm_aggregates, FROM - clients_non_norm_histogram_bucket_counts_v1 AS non_norm_bucket_counts + `moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1` AS non_norm_bucket_counts GROUP BY os, app_version, @@ -88,28 +88,29 @@ WITH aggregates AS ( KEY, process, client_agg_type, - first_bucket) - - SELECT - IF - (os = '*', NULL, os) AS os, - app_version, - IF - (app_build_id = '*', NULL, app_build_id) AS app_build_id, - channel, - metric, - metric_type, - key, - process, - first_bucket, - aggregates.last_bucket, - aggregates.num_buckets, - client_agg_type, - agg_type, - aggregates.total_users, - aggregates.aggregates, - non_norm_aggregates.non_norm_aggregates - FROM aggregates INNER JOIN non_norm_aggregates + first_bucket +) +SELECT + IF(os = '*', NULL, os) AS os, + app_version, + IF(app_build_id = '*', NULL, app_build_id) AS app_build_id, + channel, + metric, + metric_type, + key, + process, + first_bucket, + aggregates.last_bucket, + aggregates.num_buckets, + client_agg_type, + agg_type, + aggregates.total_users, + aggregates.aggregates, + non_norm_aggregates.non_norm_aggregates +FROM + aggregates +INNER JOIN + non_norm_aggregates USING ( os, app_version, @@ -124,4 +125,4 @@ WITH aggregates AS ( num_buckets, client_agg_type, agg_type - ) \ No newline at end of file + ) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql index b6bb09ff2de..3ffeae76a96 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql @@ -57,15 +57,15 @@ build_ids AS ( -- for context see https://github.com/mozilla/glam/issues/1575#issuecomment-946880387 CASE WHEN channel = 'release' - THEN COUNT(DISTINCT client_id) > 625000/(@max_sample_id - @min_sample_id + 1) + THEN COUNT(DISTINCT client_id) > 625000 / (@max_sample_id - @min_sample_id + 1) WHEN channel = 'beta' - THEN COUNT(DISTINCT client_id) > 9000/(@max_sample_id - @min_sample_id + 1) + THEN COUNT(DISTINCT client_id) > 9000 / (@max_sample_id - @min_sample_id + 1) WHEN channel = 'nightly' - THEN COUNT(DISTINCT client_id) > 375/(@max_sample_id - @min_sample_id + 1) - ELSE COUNT(DISTINCT client_id) > 100/(@max_sample_id - @min_sample_id + 1) + THEN COUNT(DISTINCT client_id) > 375 / (@max_sample_id - @min_sample_id + 1) + ELSE COUNT(DISTINCT client_id) > 100 / (@max_sample_id - @min_sample_id + 1) END ), -all_combos as ( +all_combos AS ( SELECT * EXCEPT (os, app_build_id), COALESCE(combo.os, table.os) AS os, @@ -80,12 +80,11 @@ all_combos as ( ), non_normalized_histograms AS ( SELECT - * EXCEPT (sampled) REPLACE( - mozfun.map.sum(ARRAY_CONCAT_AGG(aggregates)) AS aggregates - ) + * EXCEPT (sampled) REPLACE(mozfun.map.sum(ARRAY_CONCAT_AGG(aggregates)) AS aggregates) FROM all_combos - WHERE sample_id >= @min_sample_id + WHERE + sample_id >= @min_sample_id AND sample_id <= @max_sample_id GROUP BY sample_id, @@ -137,4 +136,4 @@ GROUP BY key, process, agg_type, - aggregates.key \ No newline at end of file + aggregates.key diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_aggregates_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_aggregates_v1/query.sql index 8a8698c4ca8..5adf54e53bb 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_aggregates_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_aggregates_v1/query.sql @@ -3,7 +3,7 @@ WITH filtered_date_channel AS ( * EXCEPT (app_version), CAST(app_version AS INT64) AS app_version FROM - telemetry_derived.clients_daily_scalar_aggregates_v1 + `moz-fx-data-shared-prod.telemetry_derived.clients_daily_scalar_aggregates_v1` WHERE submission_date = @submission_date ), @@ -45,7 +45,7 @@ version_filtered_new AS ( FROM filtered_aggregates AS scalar_aggs LEFT JOIN - latest_versions + `moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions USING (channel) WHERE app_version >= (latest_version - 2) @@ -112,9 +112,9 @@ filtered_old AS ( scalar_aggs.channel, scalar_aggregates FROM - telemetry_derived.clients_scalar_aggregates_v1 AS scalar_aggs + `moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1` AS scalar_aggs LEFT JOIN - latest_versions + `moz-fx-data-shared-prod.telemetry_derived.latest_versions` AS latest_versions USING (channel) WHERE app_version >= (latest_version - 2) @@ -142,6 +142,8 @@ SELECT app_version, app_build_id, channel, - udf.merge_scalar_user_data(ARRAY_CONCAT(old_aggs, new_aggs)) AS scalar_aggregates + `moz-fx-data-shared-prod`.udf.merge_scalar_user_data( + ARRAY_CONCAT(old_aggs, new_aggs) + ) AS scalar_aggregates FROM joined_new_old diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql index 24aa86bc1ee..6c954f58bc7 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql @@ -95,7 +95,7 @@ WITH flat_clients_scalar_aggregates AS ( os = 'Windows' AND channel = 'release' AS sampled, FROM - clients_scalar_aggregates_v1 + `moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1` WHERE submission_date = @submission_date AND (@app_version IS NULL OR app_version = @app_version) @@ -167,7 +167,9 @@ user_aggregates AS ( IF(app_build_id = '*', NULL, app_build_id) AS app_build_id, channel, IF(MAX(sampled), 10, 1) AS user_count, - udf.merge_scalar_user_data(ARRAY_CONCAT_AGG(scalar_aggregates)) AS scalar_aggregates + `moz-fx-data-shared-prod`.udf.merge_scalar_user_data( + ARRAY_CONCAT_AGG(scalar_aggregates) + ) AS scalar_aggregates FROM all_combos GROUP BY @@ -261,8 +263,10 @@ booleans_and_scalars AS ( bucketed_scalars ), valid_booleans_scalars AS ( - SELECT * - FROM booleans_and_scalars + SELECT + * + FROM + booleans_and_scalars INNER JOIN build_ids USING (app_build_id, channel) @@ -347,4 +351,4 @@ SELECT *, aggregates AS non_norm_aggregates FROM - aggregated \ No newline at end of file + aggregated diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql index 48b30304923..2afecdc5b4d 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql @@ -11,7 +11,7 @@ WITH histogram_data AS ( h1.aggregates, IF(os = 'Windows' AND channel = 'release', 10, 1) AS sample_mult FROM - clients_histogram_aggregates_v2, + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2`, UNNEST(histogram_aggregates) h1 WHERE submission_date = @submission_date @@ -25,7 +25,7 @@ scalars_data AS ( scalar_aggregates, IF(os = 'Windows' AND channel = 'release', 10, 1) AS sample_mult FROM - clients_scalar_aggregates_v1 + `moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1` WHERE submission_date = @submission_date ) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/glam_user_counts_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/glam_user_counts_v1/query.sql index 5d1f9e363b1..6f18e9b01fc 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/glam_user_counts_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/glam_user_counts_v1/query.sql @@ -5,21 +5,22 @@ WITH all_clients AS ( app_version, app_build_id, channel - FROM clients_scalar_aggregates_v1 - WHERE submission_date = @submission_date - + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1` + WHERE + submission_date = @submission_date UNION ALL - SELECT client_id, os, app_version, app_build_id, channel - FROM clients_histogram_aggregates_v2 - WHERE submission_date = @submission_date + FROM + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_aggregates_v2` + WHERE + submission_date = @submission_date ) - SELECT os, app_version, @@ -33,11 +34,9 @@ GROUP BY app_version, app_build_id, channel - UNION ALL - SELECT - CAST(NULL AS STRING) as os, + CAST(NULL AS STRING) AS os, app_version, app_build_id, channel, @@ -48,9 +47,7 @@ GROUP BY app_version, app_build_id, channel - UNION ALL - SELECT os, CAST(NULL AS INT64) AS app_version, @@ -63,9 +60,7 @@ GROUP BY os, app_build_id, channel - UNION ALL - SELECT os, app_version, @@ -78,9 +73,7 @@ GROUP BY os, app_version, channel - UNION ALL - SELECT os, CAST(NULL AS INT64) AS app_version, @@ -92,9 +85,7 @@ FROM GROUP BY os, channel - UNION ALL - SELECT CAST(NULL AS STRING) AS os, app_version, @@ -106,9 +97,7 @@ FROM GROUP BY app_version, channel - UNION ALL - SELECT CAST(NULL AS STRING) AS os, app_version, @@ -119,9 +108,7 @@ FROM all_clients GROUP BY app_version - UNION ALL - SELECT os, CAST(NULL AS INT64) AS app_version, @@ -132,9 +119,7 @@ FROM all_clients GROUP BY os - UNION ALL - SELECT CAST(NULL AS STRING) AS os, CAST(NULL AS INT64) AS app_version, @@ -145,9 +130,7 @@ FROM all_clients GROUP BY channel - UNION ALL - SELECT CAST(NULL AS STRING) AS os, CAST(NULL AS INT64) AS app_version, diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/histogram_percentiles_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/histogram_percentiles_v1/query.sql index adb1fe0aecd..946652487b3 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/histogram_percentiles_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/histogram_percentiles_v1/query.sql @@ -23,4 +23,4 @@ SELECT ('99.9', mozfun.glam.percentile(99.9, non_norm_aggregates, metric_type)) ] AS non_norm_aggregates FROM - clients_histogram_probe_counts_v1 + `moz-fx-data-shared-prod.telemetry_derived.clients_histogram_probe_counts_v1` diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/scalar_percentiles_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/scalar_percentiles_v1/query.sql index 250ee373091..c04cd196c7d 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/scalar_percentiles_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/scalar_percentiles_v1/query.sql @@ -1,36 +1,41 @@ WITH flat_clients_scalar_aggregates AS ( - SELECT *, - os = 'Windows' and channel = 'release' AS sampled, + SELECT + *, + os = 'Windows' + AND channel = 'release' AS sampled, FROM - clients_scalar_aggregates_v1 + `moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1` WHERE submission_date = @submission_date - AND ( - @app_version IS NULL - OR app_version = @app_version - ) + AND (@app_version IS NULL OR app_version = @app_version) ), - -static_combos as ( - SELECT null as os, null as app_build_id +static_combos AS ( + SELECT + NULL AS os, + NULL AS app_build_id UNION ALL - SELECT null as os, '*' as app_build_id + SELECT + NULL AS os, + '*' AS app_build_id UNION ALL - SELECT '*' as os, null as app_build_id + SELECT + '*' AS os, + NULL AS app_build_id UNION ALL - SELECT '*' as os, '*' as app_build_id + SELECT + '*' AS os, + '*' AS app_build_id ), - all_combos AS ( SELECT - * EXCEPT(os, app_build_id), - COALESCE(combos.os, flat_table.os) as os, - COALESCE(combos.app_build_id, flat_table.app_build_id) as app_build_id + * EXCEPT (os, app_build_id), + COALESCE(combos.os, flat_table.os) AS os, + COALESCE(combos.app_build_id, flat_table.app_build_id) AS app_build_id FROM - flat_clients_scalar_aggregates flat_table + flat_clients_scalar_aggregates flat_table CROSS JOIN - static_combos combos), - + static_combos combos +), user_aggregates AS ( SELECT client_id, @@ -39,7 +44,9 @@ user_aggregates AS ( IF(app_build_id = '*', NULL, app_build_id) AS app_build_id, channel, IF(MAX(sampled), 10, 1) AS user_count, - udf.merge_scalar_user_data(ARRAY_CONCAT_AGG(scalar_aggregates)) AS scalar_aggregates + `moz-fx-data-shared-prod`.udf.merge_scalar_user_data( + ARRAY_CONCAT_AGG(scalar_aggregates) + ) AS scalar_aggregates FROM all_combos GROUP BY @@ -47,8 +54,8 @@ user_aggregates AS ( os, app_version, app_build_id, - channel), - + channel +), percentiles AS ( SELECT os, @@ -66,10 +73,11 @@ percentiles AS ( agg_type AS client_agg_type, 'percentiles' AS agg_type, SUM(user_count) AS total_users, - APPROX_QUANTILES(value, 1000) AS aggregates + APPROX_QUANTILES(value, 1000) AS aggregates FROM user_aggregates - CROSS JOIN UNNEST(scalar_aggregates) + CROSS JOIN + UNNEST(scalar_aggregates) GROUP BY os, app_version, @@ -82,12 +90,15 @@ percentiles AS ( client_agg_type ), aggregated AS ( - SELECT * - REPLACE(mozfun.glam.map_from_array_offsets_precise( - [0.1, 1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0, 99.9], - aggregates - ) AS aggregates) - FROM percentiles + SELECT + * REPLACE ( + mozfun.glam.map_from_array_offsets_precise( + [0.1, 1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0, 99.9], + aggregates + ) AS aggregates + ) + FROM + percentiles ) SELECT *, From 76593c90d346f158aa2568ab38e2f443985eafbb Mon Sep 17 00:00:00 2001 From: Eduardo Filho Date: Mon, 13 May 2024 11:22:20 -0400 Subject: [PATCH 2/3] Fix column order for glean probe counts --- bigquery_etl/glam/templates/probe_counts_v1.sql | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/bigquery_etl/glam/templates/probe_counts_v1.sql b/bigquery_etl/glam/templates/probe_counts_v1.sql index 4981421be0f..89a2e608660 100644 --- a/bigquery_etl/glam/templates/probe_counts_v1.sql +++ b/bigquery_etl/glam/templates/probe_counts_v1.sql @@ -111,13 +111,15 @@ WITH probe_counts AS ( os = "Windows" ) SELECT - pc.* EXCEPT (total_users), - IF( - pc.os = "*", - -- Add the remaining 90% of Windows client count, if present, to the All OS (*) client count. - pc.total_users + CAST((COALESCE(wpc.total_users, 0) * 0.9) AS INT64), - pc.total_users - ) AS total_users + pc.* REPLACE ( + IF( + pc.os = "*", + -- Add the remaining 90% of Windows client count, if present, to the All OS (*) client count. + pc.total_users + CAST((COALESCE(wpc.total_users, 0) * 0.9) AS INT64), + pc.total_users + ) + AS total_users + ) FROM probe_counts pc LEFT JOIN From 77b33d113082e424e4f0bdf02bfb0ba00e41b9fd Mon Sep 17 00:00:00 2001 From: Eduardo Filho Date: Mon, 13 May 2024 13:41:17 -0400 Subject: [PATCH 3/3] Add fully qualified table name to tests --- ...emetry_derived.clients_histogram_bucket_counts_v1.schema.json} | 0 ...rived.clients_non_norm_histogram_bucket_counts_v1.schema.json} | 0 ...rod.telemetry_derived.clients_histogram_bucket_counts_v1.yaml} | 0 ...etry_derived.clients_non_norm_histogram_bucket_counts_v1.yaml} | 0 ...od.telemetry_derived.clients_scalar_aggregates_v1.schema.json} | 0 ...ared-prod.telemetry_derived.clients_scalar_aggregates_v1.yaml} | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/{clients_histogram_bucket_counts_v1.schema.json => moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.schema.json} (100%) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/{clients_non_norm_histogram_bucket_counts_v1.schema.json => moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.schema.json} (100%) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/{clients_histogram_bucket_counts_v1.yaml => moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.yaml} (100%) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/{clients_non_norm_histogram_bucket_counts_v1.yaml => moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.yaml} (100%) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/{clients_scalar_aggregates_v1.schema.json => moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.schema.json} (100%) rename tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/test_aggregation/{clients_scalar_aggregates_v1.yaml => moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.yaml} (100%) diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/clients_histogram_bucket_counts_v1.schema.json b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.schema.json similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/clients_histogram_bucket_counts_v1.schema.json rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.schema.json diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/clients_non_norm_histogram_bucket_counts_v1.schema.json b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.schema.json similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/clients_non_norm_histogram_bucket_counts_v1.schema.json rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.schema.json diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/clients_histogram_bucket_counts_v1.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.yaml similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/clients_histogram_bucket_counts_v1.yaml rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_histogram_bucket_counts_v1.yaml diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/clients_non_norm_histogram_bucket_counts_v1.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.yaml similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/clients_non_norm_histogram_bucket_counts_v1.yaml rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_non_norm_histogram_bucket_counts_v1.yaml diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/clients_scalar_aggregates_v1.schema.json b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.schema.json similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/clients_scalar_aggregates_v1.schema.json rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.schema.json diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/test_aggregation/clients_scalar_aggregates_v1.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.yaml similarity index 100% rename from tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/test_aggregation/clients_scalar_aggregates_v1.yaml rename to tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/test_aggregation/moz-fx-data-shared-prod.telemetry_derived.clients_scalar_aggregates_v1.yaml