From 0e2ff92ec7650600fee69fb554f5a3ab476c3be2 Mon Sep 17 00:00:00 2001 From: Ante Kresic Date: Tue, 12 Dec 2023 14:16:32 +0100 Subject: [PATCH] Fix groupby pathkeys for gapfill in PG16 With PG16, group pathkeys can include column which are not in the GROUP BY clause when dealing with ordered aggregates. This means we have to exclude those columns when checking and creating the gapfill custom scan subpath. Fixes #6396 --- .unreleased/fix_6408 | 2 + tsl/src/nodes/gapfill/gapfill_plan.c | 39 ++++++++++++----- tsl/test/shared/expected/gapfill-13.out | 56 +++++++++++++++++++++++++ tsl/test/shared/expected/gapfill-14.out | 56 +++++++++++++++++++++++++ tsl/test/shared/expected/gapfill-15.out | 56 +++++++++++++++++++++++++ tsl/test/shared/expected/gapfill-16.out | 56 +++++++++++++++++++++++++ tsl/test/shared/sql/gapfill.sql.in | 34 +++++++++++++++ 7 files changed, 289 insertions(+), 10 deletions(-) create mode 100644 .unreleased/fix_6408 diff --git a/.unreleased/fix_6408 b/.unreleased/fix_6408 new file mode 100644 index 00000000000..9b46071e274 --- /dev/null +++ b/.unreleased/fix_6408 @@ -0,0 +1,2 @@ +Fixes: #6408 Fix groupby pathkeys for gapfill in PG16 +Thanks: @MA-MacDonald for reporting an issue with gapfill in PG16 diff --git a/tsl/src/nodes/gapfill/gapfill_plan.c b/tsl/src/nodes/gapfill/gapfill_plan.c index 616e6a3f240..e2949ed9823 100644 --- a/tsl/src/nodes/gapfill/gapfill_plan.c +++ b/tsl/src/nodes/gapfill/gapfill_plan.c @@ -135,7 +135,18 @@ window_function_walker(Node *node, gapfill_walker_context *context) static bool gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func) { - if (list_length(subpath->pathkeys) != list_length(root->group_pathkeys)) + int num_groupby_pathkeys; +#if PG16_LT + num_groupby_pathkeys = list_length(root->group_pathkeys); +#else + /* In PG16 group_pathkeys can contain additional pathkeys + * used for optimization on ordered aggregates. + * We only want to deal with group by elements only here. + */ + num_groupby_pathkeys = root->num_groupby_pathkeys; +#endif + + if (list_length(subpath->pathkeys) != num_groupby_pathkeys) return false; if (list_length(subpath->pathkeys) > 0) @@ -147,12 +158,11 @@ gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func) if (BTLessStrategyNumber == pk->pk_strategy && IsA(em->em_expr, FuncExpr) && ((FuncExpr *) em->em_expr)->funcid == func->funcid) { - ListCell *lc; - - /* check all groups are part of subpath pathkeys */ - foreach (lc, root->group_pathkeys) + int i; + /* check all groupby pathkeys are part of subpath pathkeys */ + for (i = 0; i < num_groupby_pathkeys; i++) { - if (!list_member(subpath->pathkeys, lfirst(lc))) + if (!list_member(subpath->pathkeys, list_nth(root->group_pathkeys, i))) return false; } return true; @@ -358,13 +368,22 @@ gapfill_path_create(PlannerInfo *root, Path *subpath, FuncExpr *func) if (!gapfill_correct_order(root, subpath, func)) { List *new_order = NIL; - ListCell *lc; PathKey *pk_func = NULL; - + int num_groupby_pathkeys; +#if PG16_LT + num_groupby_pathkeys = list_length(root->group_pathkeys); +#else + /* In PG16 group_pathkeys can contain additional pathkeys + * used for optimization on ordered aggregates. + * We only want to deal with group by elements only here. + */ + num_groupby_pathkeys = root->num_groupby_pathkeys; +#endif + int i; /* subpath does not have correct order */ - foreach (lc, root->group_pathkeys) + for (i = 0; i < num_groupby_pathkeys; i++) { - PathKey *pk = lfirst(lc); + PathKey *pk = list_nth(root->group_pathkeys, i); EquivalenceMember *em = linitial(pk->pk_eclass->ec_members); if (!pk_func && IsA(em->em_expr, FuncExpr) && diff --git a/tsl/test/shared/expected/gapfill-13.out b/tsl/test/shared/expected/gapfill-13.out index a34879cd718..9d3fd64aab1 100644 --- a/tsl/test/shared/expected/gapfill-13.out +++ b/tsl/test/shared/expected/gapfill-13.out @@ -3392,3 +3392,59 @@ GROUP BY 1; (7 rows) DROP TABLE month_timezone; +-- Test gapfill with additional group pathkeys added for optimization (#6396) +CREATE TABLE stocks_real_time ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + price DOUBLE PRECISION NULL, + day_volume INT NULL +); +INSERT INTO stocks_real_time + VALUES + (NOW(), 's1', 70.0, 50), + (NOW(), 's2', 66.5, 60), + (NOW(), 's3', 77.0, 65); +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1; + day | symbol_count +------------------------------+-------------- + Thu Nov 30 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Sat Dec 09 16:00:00 2023 PST | +(10 rows) + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1 +ORDER BY 1 DESC; + day | symbol_count +------------------------------+-------------- + Sat Dec 09 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Thu Nov 30 16:00:00 2023 PST | +(10 rows) + +DROP TABLE stocks_real_time; diff --git a/tsl/test/shared/expected/gapfill-14.out b/tsl/test/shared/expected/gapfill-14.out index a34879cd718..9d3fd64aab1 100644 --- a/tsl/test/shared/expected/gapfill-14.out +++ b/tsl/test/shared/expected/gapfill-14.out @@ -3392,3 +3392,59 @@ GROUP BY 1; (7 rows) DROP TABLE month_timezone; +-- Test gapfill with additional group pathkeys added for optimization (#6396) +CREATE TABLE stocks_real_time ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + price DOUBLE PRECISION NULL, + day_volume INT NULL +); +INSERT INTO stocks_real_time + VALUES + (NOW(), 's1', 70.0, 50), + (NOW(), 's2', 66.5, 60), + (NOW(), 's3', 77.0, 65); +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1; + day | symbol_count +------------------------------+-------------- + Thu Nov 30 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Sat Dec 09 16:00:00 2023 PST | +(10 rows) + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1 +ORDER BY 1 DESC; + day | symbol_count +------------------------------+-------------- + Sat Dec 09 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Thu Nov 30 16:00:00 2023 PST | +(10 rows) + +DROP TABLE stocks_real_time; diff --git a/tsl/test/shared/expected/gapfill-15.out b/tsl/test/shared/expected/gapfill-15.out index a34879cd718..9d3fd64aab1 100644 --- a/tsl/test/shared/expected/gapfill-15.out +++ b/tsl/test/shared/expected/gapfill-15.out @@ -3392,3 +3392,59 @@ GROUP BY 1; (7 rows) DROP TABLE month_timezone; +-- Test gapfill with additional group pathkeys added for optimization (#6396) +CREATE TABLE stocks_real_time ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + price DOUBLE PRECISION NULL, + day_volume INT NULL +); +INSERT INTO stocks_real_time + VALUES + (NOW(), 's1', 70.0, 50), + (NOW(), 's2', 66.5, 60), + (NOW(), 's3', 77.0, 65); +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1; + day | symbol_count +------------------------------+-------------- + Thu Nov 30 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Sat Dec 09 16:00:00 2023 PST | +(10 rows) + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1 +ORDER BY 1 DESC; + day | symbol_count +------------------------------+-------------- + Sat Dec 09 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Thu Nov 30 16:00:00 2023 PST | +(10 rows) + +DROP TABLE stocks_real_time; diff --git a/tsl/test/shared/expected/gapfill-16.out b/tsl/test/shared/expected/gapfill-16.out index 1f430c6adef..f03606ed10f 100644 --- a/tsl/test/shared/expected/gapfill-16.out +++ b/tsl/test/shared/expected/gapfill-16.out @@ -3394,3 +3394,59 @@ GROUP BY 1; (7 rows) DROP TABLE month_timezone; +-- Test gapfill with additional group pathkeys added for optimization (#6396) +CREATE TABLE stocks_real_time ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + price DOUBLE PRECISION NULL, + day_volume INT NULL +); +INSERT INTO stocks_real_time + VALUES + (NOW(), 's1', 70.0, 50), + (NOW(), 's2', 66.5, 60), + (NOW(), 's3', 77.0, 65); +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1; + day | symbol_count +------------------------------+-------------- + Thu Nov 30 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Sat Dec 09 16:00:00 2023 PST | +(10 rows) + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1 +ORDER BY 1 DESC; + day | symbol_count +------------------------------+-------------- + Sat Dec 09 16:00:00 2023 PST | + Fri Dec 08 16:00:00 2023 PST | + Thu Dec 07 16:00:00 2023 PST | + Wed Dec 06 16:00:00 2023 PST | + Tue Dec 05 16:00:00 2023 PST | + Mon Dec 04 16:00:00 2023 PST | + Sun Dec 03 16:00:00 2023 PST | + Sat Dec 02 16:00:00 2023 PST | + Fri Dec 01 16:00:00 2023 PST | + Thu Nov 30 16:00:00 2023 PST | +(10 rows) + +DROP TABLE stocks_real_time; diff --git a/tsl/test/shared/sql/gapfill.sql.in b/tsl/test/shared/sql/gapfill.sql.in index fdc5cfe5c1d..3a44b2d3740 100644 --- a/tsl/test/shared/sql/gapfill.sql.in +++ b/tsl/test/shared/sql/gapfill.sql.in @@ -1537,3 +1537,37 @@ FROM GROUP BY 1; DROP TABLE month_timezone; + +-- Test gapfill with additional group pathkeys added for optimization (#6396) +CREATE TABLE stocks_real_time ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + price DOUBLE PRECISION NULL, + day_volume INT NULL +); + +INSERT INTO stocks_real_time + VALUES + (NOW(), 's1', 70.0, 50), + (NOW(), 's2', 66.5, 60), + (NOW(), 's3', 77.0, 65); + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1; + +SELECT + time_bucket_gapfill('1 day', time) AS day, + COUNT(DISTINCT symbol) AS symbol_count +FROM stocks_real_time +WHERE time > '2023-12-01' + AND time < '2023-12-10' +GROUP BY 1 +ORDER BY 1 DESC; + +DROP TABLE stocks_real_time; +