Fix havingqual processing for caggs

If the targetlist for the cagg query has both subexprs and exprs from the having clause, the havingqual for the partial view is generated incorrectly. Fix this issue by checking havingqual against all the entries in the targetlist instead of first match. Fixes #2655
timescale · Jul 20, 2021 · d1a7fe7 · d1a7fe7
1 parent 78a21f4
commit d1a7fe7
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 24 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,9 +8,11 @@ accidentally triggering the load of a previous DB version.**
 
 **Bugfixes**
 * #3401 Fix segfault for RelOptInfo without fdw_private
+* #3430 Fix havingqual processing for continuous aggregates
 
 **Thanks**
 * @fvannee for reporting an issue with hypertable expansion in functions
+* @brianbenns for reporting a segfault with continuous aggregates
 
 ## 2.3.1 (2021-07-05)
 

diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c
@@ -1335,8 +1335,8 @@ add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
  */
 typedef struct Cagg_havingcxt
 {
-	TargetEntry *old;
-	TargetEntry *new;
+	List *orig_tlist;
+	List *mod_tlist;
 	bool found;
 } cagg_havingcxt;
 
@@ -1348,40 +1348,50 @@ replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
 {
 	if (node == NULL)
 		return NULL;
-	if (equal(node, cxt->old->expr))
+	ListCell *lc, *lc2;
+	List *origtlist = cxt->orig_tlist;
+	List *modtlist = cxt->mod_tlist;
+	forboth (lc, origtlist, lc2, modtlist)
 	{
-		cxt->found = true;
-		return (Node *) cxt->new->expr;
+		TargetEntry *te = (TargetEntry *) lfirst(lc);
+		TargetEntry *modte = (TargetEntry *) lfirst(lc2);
+		if (equal(node, te->expr))
+		{
+			cxt->found = true;
+			return (Node *) modte->expr;
+		}
 	}
 	return expression_tree_mutator(node, replace_having_qual_mutator, cxt);
 }
 
-/* modify the havingqual and replace exprs that already occur in targetlist
- * with entries from new target list
+/* modify the havingqual and replace exprs (in havingqual) that already occur
+ * in targetlist with entries from new target list
+ * Arguments:
+ *   origquery : Query whose havingqual will be modified.
+ *   newtlist : fixed up targetlist for origquery (the origquery's tlist is mapped
+ *              to columns from materialized hypertable). There is a 1-1 mapping
+ *              between origquery->targetList and newtlist
  * RETURNS: havingQual
+ * Example:
+ * SELECT x, count(x) , time_bucket(...)
+ * FROM ...
+ * HAVING count(x) > 10 and sum(y) > 11 --->havingQual
+ *
+ * count(x) : already appears in the targetlist of the query and should be
+ * replaced by the corresponding entry from modtlist. We have to compare count(x)
+ * with all the entries in the tlist so that we do not match subexprs when the
+ * complete expr (e.g do not match x instead of count(x) ) is in the tlist.
+ * (issue 2655)
  */
 static Node *
 replace_targetentry_in_havingqual(Query *origquery, List *newtlist)
 {
 	Node *having = copyObject(origquery->havingQual);
-	List *origtlist = origquery->targetList;
-	List *modtlist = newtlist;
-	ListCell *lc, *lc2;
 	cagg_havingcxt hcxt;
-
-	/* if we have any exprs that are in the targetlist, then we already have columns
-	 * for them in the mat table. So replace with the correct expr
-	 */
-	forboth (lc, origtlist, lc2, modtlist)
-	{
-		TargetEntry *te = (TargetEntry *) lfirst(lc);
-		TargetEntry *modte = (TargetEntry *) lfirst(lc2);
-		hcxt.old = te;
-		hcxt.new = modte;
-		hcxt.found = false;
-		having =
-			(Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
-	}
+	hcxt.orig_tlist = origquery->targetList;
+	hcxt.mod_tlist = newtlist;
+	hcxt.found = false;
+	having = (Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
 	return having;
 }
 

diff --git a/tsl/test/expected/continuous_aggs.out b/tsl/test/expected/continuous_aggs.out
@@ -1415,3 +1415,47 @@ NOTICE:  drop cascades to 6 other objects
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_35_75_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_36_76_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_37_77_chunk
+----
+--- github issue 2655 ---
+create table raw_data(time timestamptz, search_query text, cnt integer);
+select create_hypertable('raw_data','time');
+NOTICE:  adding not-null constraint to column "time"
+   create_hypertable    
+------------------------
+ (38,public,raw_data,t)
+(1 row)
+
+insert into raw_data select '2000-01-01','Q1';
+CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count,
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
+NOTICE:  refreshing continuous aggregate "search_query_count_1m"
+insert into raw_data select '2000-01-01 00:00+0','Q1', 1;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 2;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 3;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 10;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+(2 rows)
+
+--only 1 of these should appear in the result
+insert into raw_data select '2000-01-02 00:00+0','Q3', 0;
+insert into raw_data select '2000-01-03 00:00+0','Q4', 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 | Sun Jan 02 16:00:00 2000 PST
+(3 rows)
+
diff --git a/tsl/test/sql/continuous_aggs.sql b/tsl/test/sql/continuous_aggs.sql
@@ -1027,3 +1027,35 @@ GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
 ORDER BY water_consumption;
 
 DROP TABLE water_consumption CASCADE;
+
+----
+--- github issue 2655 ---
+create table raw_data(time timestamptz, search_query text, cnt integer);
+select create_hypertable('raw_data','time');
+insert into raw_data select '2000-01-01','Q1';
+
+CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count,
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
+
+insert into raw_data select '2000-01-01 00:00+0','Q1', 1;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 2;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 3;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 10;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 20;
+
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+
+--only 1 of these should appear in the result
+insert into raw_data select '2000-01-02 00:00+0','Q3', 0;
+insert into raw_data select '2000-01-03 00:00+0','Q4', 20;
+
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+
+