Skip to content

Commit

Permalink
Fix havingqual processing for caggs
Browse files Browse the repository at this point in the history
If the targetlist for the cagg query has both subexprs and exprs
from the having clause, the havingqual for the partial view
is generated incorrectly. Fix this issue by checking havingqual
against all the entries in the targetlist instead of first match.

Fixes #2655
  • Loading branch information
gayyappan committed Jul 21, 2021
1 parent 78a21f4 commit 24af1df
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 27 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ accidentally triggering the load of a previous DB version.**

**Bugfixes**
* #3401 Fix segfault for RelOptInfo without fdw_private
* #3430 Fix havingqual processing for continuous aggregates

**Thanks**
* @fvannee for reporting an issue with hypertable expansion in functions
* @brianbenns for reporting a segfault with continuous aggregates

## 2.3.1 (2021-07-05)

Expand Down
69 changes: 42 additions & 27 deletions tsl/src/continuous_aggs/create.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* The entry point for the code is
* tsl_process_continuous_agg_viewstmt
* The bulk of the code that creates the underlying tables/views etc. is in
* cagg_create
* cagg_create.
*
*/
#include <postgres.h>
Expand Down Expand Up @@ -1335,8 +1335,8 @@ add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
*/
typedef struct Cagg_havingcxt
{
TargetEntry *old;
TargetEntry *new;
List *orig_tlist;
List *mod_tlist;
bool found;
} cagg_havingcxt;

Expand All @@ -1348,40 +1348,55 @@ replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
{
if (node == NULL)
return NULL;
if (equal(node, cxt->old->expr))
ListCell *lc, *lc2;
List *origtlist = cxt->orig_tlist;
List *modtlist = cxt->mod_tlist;
forboth (lc, origtlist, lc2, modtlist)
{
cxt->found = true;
return (Node *) cxt->new->expr;
TargetEntry *te = (TargetEntry *) lfirst(lc);
TargetEntry *modte = (TargetEntry *) lfirst(lc2);
if (equal(node, te->expr))
{
cxt->found = true;
return (Node *) modte->expr;
}
}
return expression_tree_mutator(node, replace_having_qual_mutator, cxt);
}

/* modify the havingqual and replace exprs that already occur in targetlist
* with entries from new target list
/* modify the havingqual and replace exprs (in havingqual) that already occur
* in targetlist with entries from new target list
* Arguments:
* origquery : Query whose havingqual will be modified.
* newtlist : fixed up targetlist for origquery (the origquery's tlist is mapped
* to columns from materialized hypertable). There is a 1-1 mapping
* between origquery->targetList and newtlist
* RETURNS: havingQual
* Example:
* SELECT x, count(x) , time_bucket(...) ---->origquery's tlist
* FROM ...
* HAVING count(x) > 10 and sum(y) > 11 --->havingQual
*
* modtlist would have entries corresponding to the finalize query's tlist:
* x , finalize(count(x)), timebucket,
* which look more like: col1, finalize(col1), col2 where the col# correspond to the
* columns from the materialization hypertable.
* (see comments for cagg_havingcxt )
* count(x) : already appears in the targetlist of the query and should be
* replaced by the corresponding entry from modtlist. We have to compare count(x)
* with all the entries in the tlist so that we do not match subexprs when the
* complete expr (e.g do not match x instead of count(x) ) is in the tlist.
* (issue 2655)
*/
static Node *
replace_targetentry_in_havingqual(Query *origquery, List *newtlist)
{
Node *having = copyObject(origquery->havingQual);
List *origtlist = origquery->targetList;
List *modtlist = newtlist;
ListCell *lc, *lc2;
cagg_havingcxt hcxt;

/* if we have any exprs that are in the targetlist, then we already have columns
* for them in the mat table. So replace with the correct expr
*/
forboth (lc, origtlist, lc2, modtlist)
{
TargetEntry *te = (TargetEntry *) lfirst(lc);
TargetEntry *modte = (TargetEntry *) lfirst(lc2);
hcxt.old = te;
hcxt.new = modte;
hcxt.found = false;
having =
(Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
}
hcxt.orig_tlist = origquery->targetList;
hcxt.mod_tlist = newtlist;
hcxt.found = false;
having = (Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
return having;
}

Expand Down Expand Up @@ -1582,7 +1597,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
*
* Step 1. create a materialiation table which stores the partials for the
* aggregates and the grouping columns + internal columns.
* So we have a table like ts_internal_mcagg_tab
* So we have a table like _materialization_hypertable
* with columns:
*( a, col1, col2, col3, internal-columns)
* where col1 = partialize(min(b)), col2= partialize(max(d)),
Expand All @@ -1592,7 +1607,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
* CREATE VIEW mcagg
* as
* select a, finalize( col1) + finalize(col2))
* from ts_internal_mcagg
* from _materialization_hypertable
* group by a, col3
*
* Step 3: Create a view to populate the materialization table
Expand Down
44 changes: 44 additions & 0 deletions tsl/test/expected/continuous_aggs.out
Original file line number Diff line number Diff line change
Expand Up @@ -1415,3 +1415,47 @@ NOTICE: drop cascades to 6 other objects
NOTICE: drop cascades to table _timescaledb_internal._hyper_35_75_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_36_76_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_37_77_chunk
----
--- github issue 2655 ---
create table raw_data(time timestamptz, search_query text, cnt integer);
select create_hypertable('raw_data','time');
NOTICE: adding not-null constraint to column "time"
create_hypertable
------------------------
(38,public,raw_data,t)
(1 row)

insert into raw_data select '2000-01-01','Q1';
CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous)
AS
SELECT search_query,count(search_query) as count,
time_bucket(INTERVAL '1 minute', time) AS bucket
FROM raw_data
WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
NOTICE: refreshing continuous aggregate "search_query_count_1m"
insert into raw_data select '2000-01-01 00:00+0','Q1', 1;
insert into raw_data select '2000-01-01 00:00+0','Q1', 2;
insert into raw_data select '2000-01-01 00:00+0','Q1', 3;
insert into raw_data select '2000-01-02 00:00+0','Q2', 10;
insert into raw_data select '2000-01-02 00:00+0','Q2', 20;
CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
SELECT * FROM search_query_count_1m ORDER BY 1, 2;
search_query | count | bucket
--------------+-------+------------------------------
Q1 | 3 | Fri Dec 31 16:00:00 1999 PST
Q2 | 2 | Sat Jan 01 16:00:00 2000 PST
(2 rows)

--only 1 of these should appear in the result
insert into raw_data select '2000-01-02 00:00+0','Q3', 0;
insert into raw_data select '2000-01-03 00:00+0','Q4', 20;
CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
SELECT * FROM search_query_count_1m ORDER BY 1, 2;
search_query | count | bucket
--------------+-------+------------------------------
Q1 | 3 | Fri Dec 31 16:00:00 1999 PST
Q2 | 2 | Sat Jan 01 16:00:00 2000 PST
Q4 | 1 | Sun Jan 02 16:00:00 2000 PST
(3 rows)

32 changes: 32 additions & 0 deletions tsl/test/sql/continuous_aggs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1027,3 +1027,35 @@ GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;

DROP TABLE water_consumption CASCADE;

----
--- github issue 2655 ---
create table raw_data(time timestamptz, search_query text, cnt integer);
select create_hypertable('raw_data','time');
insert into raw_data select '2000-01-01','Q1';

CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous)
AS
SELECT search_query,count(search_query) as count,
time_bucket(INTERVAL '1 minute', time) AS bucket
FROM raw_data
WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;

insert into raw_data select '2000-01-01 00:00+0','Q1', 1;
insert into raw_data select '2000-01-01 00:00+0','Q1', 2;
insert into raw_data select '2000-01-01 00:00+0','Q1', 3;
insert into raw_data select '2000-01-02 00:00+0','Q2', 10;
insert into raw_data select '2000-01-02 00:00+0','Q2', 20;

CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
SELECT * FROM search_query_count_1m ORDER BY 1, 2;

--only 1 of these should appear in the result
insert into raw_data select '2000-01-02 00:00+0','Q3', 0;
insert into raw_data select '2000-01-03 00:00+0','Q4', 20;

CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
SELECT * FROM search_query_count_1m ORDER BY 1, 2;


0 comments on commit 24af1df

Please sign in to comment.