From 657085cddd0fe2b619ccec2078feb813e98ef20e Mon Sep 17 00:00:00 2001
From: gayyappan <gayathri@timescale.com>
Date: Tue, 20 Jul 2021 16:51:54 -0400
Subject: [PATCH] Fix havingqual processing for caggs

If the targetlist for the cagg query has both subexprs and exprs
from the having clause, the havingqual for the partial view
is generated incorrectly. Fix this issue by checking havingqual
against all the entries in the targetlist instead of first match.

Fixes #2655
---
 CHANGELOG.md                          |   9 ++
 tsl/src/continuous_aggs/create.c      | 168 ++++++++++++++------------
 tsl/test/expected/continuous_aggs.out |  89 ++++++++++++++
 tsl/test/sql/continuous_aggs.sql      |  60 +++++++++
 4 files changed, 247 insertions(+), 79 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 304517a54cf..a47074b48fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,15 @@
 `psql` with the `-X` flag to prevent any `.psqlrc` commands from
 accidentally triggering the load of a previous DB version.**
 
+## Unreleased
+
+
+**Bugfixes**
+* #3430 Fix havingqual processing for continuous aggregates
+
+**Thanks**
+* @brianbenns for reporting a segfault with continuous aggregates
+
 ## 2.4.0 (2021-07-29)
 
 This release adds new experimental features since the 2.3.1 release.
diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c
index 2dc467d932c..6f9b7bf99ac 100644
--- a/tsl/src/continuous_aggs/create.c
+++ b/tsl/src/continuous_aggs/create.c
@@ -11,7 +11,7 @@
  * The entry point for the code is
  * tsl_process_continuous_agg_viewstmt
  * The bulk of the code that creates the underlying tables/views etc. is in
- * cagg_create
+ * cagg_create.
  *
  */
 #include <postgres.h>
@@ -1280,6 +1280,25 @@ mattablecolumninfo_addinternal(MatTableColumnInfo *matcolinfo, RangeTblEntry *us
 	matcolinfo->partial_grouplist = lappend(matcolinfo->partial_grouplist, grpcl);
 }
 
+static Aggref *
+add_partialize_column(Aggref *agg_to_partialize, AggPartCxt *cxt)
+{
+	Aggref *newagg;
+	Var *var;
+	/* step 1: create partialize( aggref) column
+	 * for materialization table */
+	var = mattablecolumninfo_addentry(cxt->mattblinfo,
+									  (Node *) agg_to_partialize,
+									  cxt->original_query_resno);
+	cxt->addcol = true;
+	/* step 2: create finalize_agg expr using var
+	 * for the clumn added to the materialization table
+	 */
+	/* This is a var for the column we created */
+	newagg = get_finalize_aggref(agg_to_partialize, var);
+	return newagg;
+}
+
 static Node *
 add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
 {
@@ -1294,96 +1313,95 @@ add_aggregate_partialize_mutator(Node *node, AggPartCxt *cxt)
 	 */
 	if (IsA(node, Aggref))
 	{
-		Aggref *newagg;
-		Var *var;
-
 		if (cxt->ignore_aggoid == ((Aggref *) node)->aggfnoid)
 			return node; /*don't process this further */
 
-		/* step 1: create partialize( aggref) column
-		 * for materialization table */
-		var = mattablecolumninfo_addentry(cxt->mattblinfo, node, cxt->original_query_resno);
-		cxt->addcol = true;
-		/* step 2: create finalize_agg expr using var
-		 * for the clumn added to the materialization table
-		 */
-		/* This is a var for the column we created */
-		newagg = get_finalize_aggref((Aggref *) node, var);
+		Aggref *newagg = add_partialize_column((Aggref *) node, cxt);
 		return (Node *) newagg;
 	}
 	return expression_tree_mutator(node, add_aggregate_partialize_mutator, cxt);
 }
 
-/* This code modifies modquery */
-/* having clause needs transformation
- * original query is
- * select a, count(b), min(c)
- * from ..
- * group by a
- * having a> 10 or count(b) > 20 or min(d) = 4
- * we get a mat table
- * a, partial(countb), partial(minc) after processing
- * the target list. We need to add entries from the having clause
- * so the modified mat table is
- * a, partial(count), partial(minc), partial(mind)
- * and the new select from the mat table is
- * i.e. we have
- * select col1, finalize(col2), finalize(col3)
- * from ..
- * group by col1
- * having col1 > 10 or finalize(col2) > 20 or finalize(col4) = 4
- * Note: col# = corresponding column from the mat table
- */
 typedef struct Cagg_havingcxt
 {
-	TargetEntry *old;
-	TargetEntry *new;
-	bool found;
+	List *origq_tlist;
+	List *finalizeq_tlist;
+	AggPartCxt agg_cxt;
 } cagg_havingcxt;
 
-/* if we find a target entry  expr that matches the node , then replace it with the
- * expression from  new target entry.
+/* This function modifies the passed in havingQual by mapping exprs to
+ * columns in materialization table or finalized aggregate form.
+ * Note that HAVING clause can contain only exprs from group-by or aggregates
+ * and GROUP BY clauses cannot be aggregates.
+ * (By the time we process havingQuals, all the group by exprs have been
+ * processed and have associated columns in the materialization hypertable).
+ * Example, if  the original query has
+ * GROUP BY  colA + colB, colC
+ *   HAVING colA + colB + sum(colD) > 10 OR count(colE) = 10
+ *
+ * The transformed havingqual would be
+ * HAVING   matCol3 + finalize_agg( sum(matCol4) > 10
+ *       OR finalize_agg( count(matCol5)) = 10
+ *
+ *
+ * Note: GROUP BY exprs always appear in the query's targetlist.
+ * Some of the aggregates from the havingQual  might also already appear in the targetlist.
+ * We replace all existing entries with their corresponding entry from the modified targetlist.
+ * If an aggregate (in the havingqual) does not exist in the TL, we create a
+ *  materialization table column for it and use the finalize(column) form in the
+ * transformed havingQual.
  */
 static Node *
-replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
+create_replace_having_qual_mutator(Node *node, cagg_havingcxt *cxt)
 {
 	if (node == NULL)
 		return NULL;
-	if (equal(node, cxt->old->expr))
-	{
-		cxt->found = true;
-		return (Node *) cxt->new->expr;
-	}
-	return expression_tree_mutator(node, replace_having_qual_mutator, cxt);
-}
-
-/* modify the havingqual and replace exprs that already occur in targetlist
- * with entries from new target list
- * RETURNS: havingQual
- */
-static Node *
-replace_targetentry_in_havingqual(Query *origquery, List *newtlist)
-{
-	Node *having = copyObject(origquery->havingQual);
-	List *origtlist = origquery->targetList;
-	List *modtlist = newtlist;
-	ListCell *lc, *lc2;
-	cagg_havingcxt hcxt;
-
-	/* if we have any exprs that are in the targetlist, then we already have columns
-	 * for them in the mat table. So replace with the correct expr
+	/* See if we already have a column in materialization hypertable for this
+	 * expr. We do this by checking the existing targetlist
+	 * entries for the query.
 	 */
+	ListCell *lc, *lc2;
+	List *origtlist = cxt->origq_tlist;
+	List *modtlist = cxt->finalizeq_tlist;
 	forboth (lc, origtlist, lc2, modtlist)
 	{
 		TargetEntry *te = (TargetEntry *) lfirst(lc);
 		TargetEntry *modte = (TargetEntry *) lfirst(lc2);
-		hcxt.old = te;
-		hcxt.new = modte;
-		hcxt.found = false;
-		having =
-			(Node *) expression_tree_mutator((Node *) having, replace_having_qual_mutator, &hcxt);
+		if (equal(node, te->expr))
+		{
+			return (Node *) modte->expr;
+		}
 	}
-	return having;
+	/* didn't find a match in targetlist. If it is an aggregate, create a partialize column for
+	 * it in materialization hypertable and return corresponding finalize
+	 * expr.
+	 */
+	if (IsA(node, Aggref))
+	{
+		AggPartCxt *agg_cxt = &(cxt->agg_cxt);
+		agg_cxt->addcol = false;
+		Aggref *newagg = add_partialize_column((Aggref *) node, agg_cxt);
+		Assert(agg_cxt->addcol == true);
+		return (Node *) newagg;
+	}
+	return expression_tree_mutator(node, create_replace_having_qual_mutator, cxt);
+}
+
+static Node *
+finalizequery_create_havingqual(FinalizeQueryInfo *inp, MatTableColumnInfo *mattblinfo)
+{
+	Query *orig_query = inp->final_userquery;
+	if (orig_query->havingQual == NULL)
+		return NULL;
+	Node *havingQual = copyObject(orig_query->havingQual);
+	Assert(inp->final_seltlist != NULL);
+	cagg_havingcxt hcxt = { .origq_tlist = orig_query->targetList,
+							.finalizeq_tlist = inp->final_seltlist,
+							.agg_cxt.mattblinfo = mattblinfo,
+							.agg_cxt.original_query_resno = 0,
+							.agg_cxt.ignore_aggoid = get_finalizefnoid(),
+							.agg_cxt.addcol = false };
+	return create_replace_having_qual_mutator(havingQual, &hcxt);
 }
 
 /*
@@ -1402,7 +1420,6 @@ finalizequery_init(FinalizeQueryInfo *inp, Query *orig_query, MatTableColumnInfo
 {
 	AggPartCxt cxt;
 	ListCell *lc;
-	Node *newhavingQual;
 	int resno = 1;
 
 	inp->final_userquery = copyObject(orig_query);
@@ -1467,14 +1484,7 @@ finalizequery_init(FinalizeQueryInfo *inp, Query *orig_query, MatTableColumnInfo
 	}
 	/* all grouping clause elements are in targetlist already.
 	   so let's check the having clause */
-	newhavingQual = replace_targetentry_in_havingqual(inp->final_userquery, inp->final_seltlist);
-	/* we might still have aggs in havingqual which don't appear in the targetlist , but don't
-	 * overwrite finalize_agg exprs that we have in the havingQual*/
-	cxt.addcol = false;
-	cxt.ignore_aggoid = get_finalizefnoid();
-	cxt.original_query_resno = 0;
-	inp->final_havingqual =
-		expression_tree_mutator((Node *) newhavingQual, add_aggregate_partialize_mutator, &cxt);
+	inp->final_havingqual = finalizequery_create_havingqual(inp, mattblinfo);
 }
 
 /* Create select query with the finalize aggregates
@@ -1583,7 +1593,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
  *
  * Step 1. create a materialiation table which stores the partials for the
  * aggregates and the grouping columns + internal columns.
- * So we have a table like ts_internal_mcagg_tab
+ * So we have a table like _materialization_hypertable
  * with columns:
  *( a, col1, col2, col3, internal-columns)
  * where col1 =  partialize(min(b)), col2= partialize(max(d)),
@@ -1593,7 +1603,7 @@ fixup_userview_query_tlist(Query *userquery, List *tlist_aliases)
  * CREATE VIEW mcagg
  * as
  * select a, finalize( col1) + finalize(col2))
- * from ts_internal_mcagg
+ * from _materialization_hypertable
  * group by a, col3
  *
  * Step 3: Create a view to populate the materialization table
diff --git a/tsl/test/expected/continuous_aggs.out b/tsl/test/expected/continuous_aggs.out
index 1a366018c31..f2f194cca6a 100644
--- a/tsl/test/expected/continuous_aggs.out
+++ b/tsl/test/expected/continuous_aggs.out
@@ -1415,3 +1415,92 @@ NOTICE:  drop cascades to 6 other objects
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_35_75_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_36_76_chunk
 NOTICE:  drop cascades to table _timescaledb_internal._hyper_37_77_chunk
+----
+--- github issue 2655 ---
+create table raw_data(time timestamptz, search_query text, cnt integer, cnt2 integer);
+select create_hypertable('raw_data','time');
+NOTICE:  adding not-null constraint to column "time"
+   create_hypertable    
+------------------------
+ (38,public,raw_data,t)
+(1 row)
+
+insert into raw_data select '2000-01-01','Q1';
+--having has exprs that appear in select 
+CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count,
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
+NOTICE:  refreshing continuous aggregate "search_query_count_1m"
+--having has aggregates + grp by columns that appear in select
+CREATE MATERIALIZED VIEW search_query_count_2 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket 
+HAVING count(search_query) > 3 OR sum(cnt) > 1 OR 
+       ( sum(cnt) + count(cnt)) > 1
+       AND search_query = 'Q1';
+NOTICE:  refreshing continuous aggregate "search_query_count_2"
+CREATE MATERIALIZED VIEW search_query_count_3 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY cnt +cnt2 , bucket, search_query
+ HAVING cnt + cnt2 + sum(cnt) > 2 or count(cnt2) > 10;
+NOTICE:  refreshing continuous aggregate "search_query_count_3"
+insert into raw_data select '2000-01-01 00:00+0','Q1', 1, 100;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 2, 200;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 3, 300;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 10, 10;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 20, 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+(2 rows)
+
+--only 1 of these should appear in the result
+insert into raw_data select '2000-01-02 00:00+0','Q3', 0, 0;
+insert into raw_data select '2000-01-03 00:00+0','Q4', 20, 20;
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+ search_query | count |            bucket            
+--------------+-------+------------------------------
+ Q1           |     3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 | Sun Jan 02 16:00:00 2000 PST
+(3 rows)
+
+--refresh search_query_count_2---
+CALL refresh_continuous_aggregate('search_query_count_2', NULL, NULL);
+SELECT * FROM search_query_count_2 ORDER BY 1, 2;
+ search_query | count | sum |            bucket            
+--------------+-------+-----+------------------------------
+ Q1           |     3 |   6 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     2 |  30 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 |  20 | Sun Jan 02 16:00:00 2000 PST
+(3 rows)
+
+--refresh search_query_count_3---
+CALL refresh_continuous_aggregate('search_query_count_3', NULL, NULL);
+SELECT * FROM search_query_count_3 ORDER BY 1, 2, 3;
+ search_query | count | sum |            bucket            
+--------------+-------+-----+------------------------------
+ Q1           |     1 |   1 | Fri Dec 31 16:00:00 1999 PST
+ Q1           |     1 |   2 | Fri Dec 31 16:00:00 1999 PST
+ Q1           |     1 |   3 | Fri Dec 31 16:00:00 1999 PST
+ Q2           |     1 |  10 | Sat Jan 01 16:00:00 2000 PST
+ Q2           |     1 |  20 | Sat Jan 01 16:00:00 2000 PST
+ Q4           |     1 |  20 | Sun Jan 02 16:00:00 2000 PST
+(6 rows)
+
diff --git a/tsl/test/sql/continuous_aggs.sql b/tsl/test/sql/continuous_aggs.sql
index 8dd6ac1e05e..4347b97d831 100644
--- a/tsl/test/sql/continuous_aggs.sql
+++ b/tsl/test/sql/continuous_aggs.sql
@@ -1027,3 +1027,63 @@ GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
 ORDER BY water_consumption;
 
 DROP TABLE water_consumption CASCADE;
+
+----
+--- github issue 2655 ---
+create table raw_data(time timestamptz, search_query text, cnt integer, cnt2 integer);
+select create_hypertable('raw_data','time');
+insert into raw_data select '2000-01-01','Q1';
+
+--having has exprs that appear in select 
+CREATE MATERIALIZED VIEW search_query_count_1m WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count,
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket HAVING count(search_query) > 3 OR sum(cnt) > 1;
+
+--having has aggregates + grp by columns that appear in select
+CREATE MATERIALIZED VIEW search_query_count_2 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY search_query, bucket 
+HAVING count(search_query) > 3 OR sum(cnt) > 1 OR 
+       ( sum(cnt) + count(cnt)) > 1
+       AND search_query = 'Q1';
+
+CREATE MATERIALIZED VIEW search_query_count_3 WITH (timescaledb.continuous) 
+AS
+ SELECT  search_query,count(search_query) as count, sum(cnt),
+         time_bucket(INTERVAL '1 minute', time) AS bucket
+ FROM raw_data
+ WHERE search_query is not null AND LENGTH(TRIM(both from search_query))>0
+ GROUP BY cnt +cnt2 , bucket, search_query
+ HAVING cnt + cnt2 + sum(cnt) > 2 or count(cnt2) > 10;
+
+insert into raw_data select '2000-01-01 00:00+0','Q1', 1, 100;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 2, 200;
+insert into raw_data select '2000-01-01 00:00+0','Q1', 3, 300;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 10, 10;
+insert into raw_data select '2000-01-02 00:00+0','Q2', 20, 20;
+
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+
+--only 1 of these should appear in the result
+insert into raw_data select '2000-01-02 00:00+0','Q3', 0, 0;
+insert into raw_data select '2000-01-03 00:00+0','Q4', 20, 20;
+
+CALL refresh_continuous_aggregate('search_query_count_1m', NULL, NULL);
+SELECT * FROM search_query_count_1m ORDER BY 1, 2;
+
+--refresh search_query_count_2---
+CALL refresh_continuous_aggregate('search_query_count_2', NULL, NULL);
+SELECT * FROM search_query_count_2 ORDER BY 1, 2;
+
+--refresh search_query_count_3---
+CALL refresh_continuous_aggregate('search_query_count_3', NULL, NULL);
+SELECT * FROM search_query_count_3 ORDER BY 1, 2, 3;