timescale · gayyappan · Aug 17, 2021 · Jul 20, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,15 @@
 `psql` with the `-X` flag to prevent any `.psqlrc` commands from
 accidentally triggering the load of a previous DB version.**
 
+## Unreleased
+
+
+**Bugfixes**
+* #3430 Fix havingqual processing for continuous aggregates
+
+**Thanks**
+* @brianbenns for reporting a segfault with continuous aggregates
+
 ## 2.4.0 (2021-07-29)
 
 This release adds new experimental features since the 2.3.1 release.

diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c
@@ -11,7 +11,7 @@
  * The entry point for the code is
  * tsl_process_continuous_agg_viewstmt
  * The bulk of the code that creates the underlying tables/views etc. is in
- * cagg_create
+ * cagg_create.
  *
  */
 #include <postgres.h>
@@ -1280,6 +1280,25 @@ mattablecolumninfo_addinternal(MatTableColumnInfo *matcolinfo, RangeTblEntry *us
 	matcolinfo->partial_grouplist = lappend(matcolinfo->partial_grouplist, grpcl);
 }
 
+static Aggref *
+add_partialize_column(Aggref *agg_to_partialize, AggPartCxt *cxt)
+{
+	Aggref *newagg;
+	Var *var;
+	/* step 1: create partialize( aggref) column
+	 * for materialization table */
+	var = mattablecolumninfo_addentry(cxt->mattblinfo,
+									  (Node *) agg_to_partialize,
+									  cxt->original_query_resno);
+	cxt->addcol = true;
+	/* step 2: create finalize_agg expr using var
+	 * for the clumn added to the materialization table
+	 */
+	/* This is a var for the column we created */
+	newagg = get_finalize_aggref(agg_to_partialize, var);
+	return newagg;
+}
+
 static Node *
 add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
 {
@@ -1294,96 +1313,95 @@ add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
 	 */
 	if (IsA(node, Aggref))
 	{
-		Aggref *newagg;
-		Var *var;
-
 		if (cxt->ignore_aggoid == ((Aggref *) node)->aggfnoid)
 			return node; /*don't process this further */
 
-		/* step 1: create partialize( aggref) column
-		 * for materialization table */
-		var = mattablecolumninfo_addentry(cxt->mattblinfo, node, cxt->original_query_resno);
-		cxt->addcol = true;
-		/* step 2: create finalize_agg expr using var
-		 * for the clumn added to the materialization table
-		 */
-		/* This is a var for the column we created */
-		newagg = get_finalize_aggref((Aggref *) node, var);
+		Aggref *newagg = add_partialize_column((Aggref *) node, cxt);
 		return (Node *) newagg;
 	}
 	return expression_tree_mutator(node, add_aggregate_partialize_mutator, cxt);
 }
 
-/* This code modifies modquery */
-/* having clause needs transformation
- * original query is
- * select a, count(b), min(c)
- * from ..
- * group by a
- * having a> 10 or count(b) > 20 or min(d) = 4
- * we get a mat table
- * a, partial(countb), partial(minc) after processing
- * the target list. We need to add entries from the having clause
- * so the modified mat table is
- * a, partial(count), partial(minc), partial(mind)
- * and the new select from the mat table is
- * i.e. we have
- * select col1, finalize(col2), finalize(col3)
- * from ..
- * group by col1
- * having col1 > 10 or finalize(col2) > 20 or finalize(col4) = 4
- * Note: col# = corresponding column from the mat table
- */
 typedef struct Cagg_havingcxt
 {
-	TargetEntry *old;
-	TargetEntry *new;
-	bool found;
+	List *origq_tlist;
+	List *finalizeq_tlist;
+	AggPartCxt agg_cxt;
 } cagg_havingcxt;
 
-/* if we find a target entry  expr that matches the node , then replace it with the
- * expression from  new target entry.
+/* This function modifies the passed in havingQual by mapping exprs to
+ * columns in materialization table or finalized aggregate form.
+ * Note that HAVING clause can contain only exprs from group-by or aggregates
+ * and GROUP BY clauses cannot be aggregates.
+ * (By the time we process havingQuals, all the group by exprs have been
+ * processed and have associated columns in the materialization hypertable).
+ * Example, if  the original query has
+ * GROUP BY  colA + colB, colC
+ *   HAVING colA + colB + sum(colD) > 10 OR count(colE) = 10
+ *
+ * The transformed havingqual would be
+ * HAVING   matCol3 + finalize_agg( sum(matCol4) > 10
+ *       OR finalize_agg( count(matCol5)) = 10
+ *
+ *
+ * Note: GROUP BY exprs always appear in the query's targetlist.
+ * Some of the aggregates from the havingQual  might also already appear in the targetlist.
+ * We replace all existing entries with their corresponding entry from the modified targetlist.
+ * If an aggregate (in the havingqual) does not exist in the TL, we create a
+ *  materialization table column for it and use the finalize(column) form in the
+ * transformed havingQual.
  */
 static Node *
-replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
+create_replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
 {
 	if (node == NULL)
 		return NULL;
-	if (equal(node, cxt->old->expr))
-	{
-		cxt->found = true;
-		return (Node *) cxt->new->expr;
-	}
-	return expression_tree_mutator(node, replace_having_qual_mutator, cxt);
-}
-
-/* modify the havingqual and replace exprs that already occur in targetlist
- * with entries from new target list
- * RETURNS: havingQual
- */
-static Node *
-replace_targetentry_in_havingqual(Query *origquery, List *newtlist)
-{
-	Node *having = copyObject(origquery->havingQual);
-	List *origtlist = origquery->targetList;
-	List *modtlist = newtlist;
-	ListCell *lc, *lc2;
-	cagg_havingcxt hcxt;
-
-	/* if we have any exprs that are in the targetlist, then we already have columns
-	 * for them in the mat table. So replace with the correct expr
+	/* See if we already have a column in materialization hypertable for this
+	 * expr. We do this by checking the existing targetlist
+	 * entries for the query.
 	 */
+	ListCell *lc, *lc2;
+	List *origtlist = cxt->origq_tlist;
+	List *modtlist = cxt->finalizeq_tlist;
 	forboth (lc, origtlist, lc2, modtlist)
 	{
 		TargetEntry *te = (TargetEntry *) lfirst(lc);
 		TargetEntry *modte = (TargetEntry *) lfirst(lc2);
-		hcxt.old = te;
-		hcxt.new = modte;
-		hcxt.found = false;
-		having =
-			(Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
+		if (equal(node, te->expr))
+		{
+			return (Node *) modte->expr;
+		}
 	}
-	return having;
+	/* didn't find a match in targetlist. If it is an aggregate, create a partialize column for
+	 * it in materialization hypertable and return corresponding finalize
+	 * expr.
+	 */
+	if (IsA(node, Aggref))
+	{
+		AggPartCxt *agg_cxt = &(cxt->agg_cxt);
+		agg_cxt->addcol = false;
+		Aggref *newagg = add_partialize_column((Aggref *) node, agg_cxt);
+		Assert(agg_cxt->addcol == true);
+		return (Node *) newagg;
+	}
+	return expression_tree_mutator(node, create_replace_having_qual_mutator, cxt);
+}
+
+static Node *
+finalizequery_create_havingqual(FinalizeQueryInfo *inp, MatTableColumnInfo *mattblinfo)
+{
+	Query *orig_query = inp->final_userquery;
+	if (orig_query->havingQual == NULL)
+		return NULL;
+	Node *havingQual = copyObject(orig_query->havingQual);
+	Assert(inp->final_seltlist != NULL);
+	cagg_havingcxt hcxt = { .origq_tlist = orig_query->targetList,
+							.finalizeq_tlist = inp->final_seltlist,
+							.agg_cxt.mattblinfo = mattblinfo,
+							.agg_cxt.original_query_resno = 0,
+							.agg_cxt.ignore_aggoid = get_finalizefnoid(),
+							.agg_cxt.addcol = false };
+	return create_replace_having_qual_mutator(havingQual, &hcxt);
 }
 
 /*
@@ -1402,7 +1420,6 @@ finalizequery_init(FinalizeQueryInfo *inp, Query *orig_query, MatTableColumnInfo
 {
 	AggPartCxt cxt;
 	ListCell *lc;
-	Node *newhavingQual;
 	int resno = 1;
 
 	inp->final_userquery = copyObject(orig_query);
@@ -1467,14 +1484,7 @@ finalizequery_init(FinalizeQueryInfo *inp, Query *orig_query, MatTableColumnInfo
 	}
 	/* all grouping clause elements are in targetlist already.
 	   so let's check the having clause */
-	newhavingQual = replace_targetentry_in_havingqual(inp->final_userquery, inp->final_seltlist);
-	/* we might still have aggs in havingqual which don't appear in the targetlist , but don't
-	 * overwrite finalize_agg exprs that we have in the havingQual*/
-	cxt.addcol = false;
-	cxt.ignore_aggoid = get_finalizefnoid();
-	cxt.original_query_resno = 0;
-	inp->final_havingqual =
-		expression_tree_mutator((Node *) newhavingQual, add_aggregate_partialize_mutator, &cxt);
+	inp->final_havingqual = finalizequery_create_havingqual(inp, mattblinfo);
 }
 
 /* Create select query with the finalize aggregates
@@ -1583,7 +1593,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
  *
  * Step 1. create a materialiation table which stores the partials for the
  * aggregates and the grouping columns + internal columns.
- * So we have a table like ts_internal_mcagg_tab
+ * So we have a table like _materialization_hypertable
  * with columns:
  *( a, col1, col2, col3, internal-columns)
  * where col1 =  partialize(min(b)), col2= partialize(max(d)),
@@ -1593,7 +1603,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
  * CREATE VIEW mcagg
  * as
  * select a, finalize( col1) + finalize(col2))
- * from ts_internal_mcagg
+ * from _materialization_hypertable
  * group by a, col3
  *
  * Step 3: Create a view to populate the materialization table

diff --git a/tsl/test/expected/continuous_aggs.out b/tsl/test/expected/continuous_aggs.out
@@ -1415,3 +1415,92 @@ NOTICE:  drop cascades to 6 other objects
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_35_75_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_36_76_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_37_77_chunk
+----
+--- github issue 2655 ---
+create table raw_data(time timestamptz, search_query text, cnt integer, cnt2 integer);
+select create_hypertable('raw_data','time');
+NOTICE:  adding not-null constraint to column "time"
+   create_hypertable    
+------------------------
+ (38,public,raw_data,t)
+(1 row)
+
+insert into raw_data select '2000-01-01','Q1';
+--having has exprs that appear in select 
+CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count,
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
+NOTICE:  refreshing continuous aggregate "search_query_count_1m"
+--having has aggregates + grp by columns that appear in select
+CREATE MATERIALIZED VIEW search_query_count_2 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket 
+HAVING count(search_query) > 3 OR sum(cnt) > 1 OR 
+       ( sum(cnt) + count(cnt)) > 1
+       AND search_query = 'Q1';
+NOTICE:  refreshing continuous aggregate "search_query_count_2"
+CREATE MATERIALIZED VIEW search_query_count_3 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY cnt +cnt2 , bucket, search_query
+ HAVING cnt + cnt2 + sum(cnt) > 2 or count(cnt2) > 10;
+NOTICE:  refreshing continuous aggregate "search_query_count_3"
+insert into raw_data select '2000-01-01 00:00+0','Q1', 1, 100;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 2, 200;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 3, 300;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 10, 10;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 20, 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+(2 rows)
+
+--only 1 of these should appear in the result
+insert into raw_data select '2000-01-02 00:00+0','Q3', 0, 0;
+insert into raw_data select '2000-01-03 00:00+0','Q4', 20, 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 | Sun Jan 02 16:00:00 2000 PST
+(3 rows)
+
+--refresh search_query_count_2---
+CALL refresh_continuous_aggregate('search_query_count_2', NULL, NULL);
+SELECT * FROM search_query_count_2 ORDER BY 1, 2;
+ search_query | count | sum |            bucket            
+--------------+-------+-----+------------------------------
+ Q1           |     3 |   6 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 |  30 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 |  20 | Sun Jan 02 16:00:00 2000 PST
+(3 rows)
+
+--refresh search_query_count_3---
+CALL refresh_continuous_aggregate('search_query_count_3', NULL, NULL);
+SELECT * FROM search_query_count_3 ORDER BY 1, 2, 3;
+ search_query | count | sum |            bucket            
+--------------+-------+-----+------------------------------
+ Q1           |     1 |   1 | Fri Dec 31 16:00:00 1999 PST
+ Q1           |     1 |   2 | Fri Dec 31 16:00:00 1999 PST
+ Q1           |     1 |   3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     1 |  10 | Sat Jan 01 16:00:00 2000 PST
+ Q2           |     1 |  20 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 |  20 | Sun Jan 02 16:00:00 2000 PST
+(6 rows)
+