Fix support for complex aggregate expression

Fixes support for continuous aggregates when the view query contains an expression with several aggregates, e.g., `max(val) - min(val)`. Usage of continuous aggregates with such expression was producing errors if the aggregate expression was not the last in the SELECT clause or not all GROUP BY expressions were present in the SELECT clause. An expression with several aggregates is materialized with partials per aggregate. For example, `max(val) - min(val)` will be materialized in two partial entry columns: one for `max` and one for `min`. Thus all columns in the materialized hypertable should account for the number of partials and cannot just use the position in the original query. This fix makes sure to account for such case. Fixes #2616
timescale · Nov 20, 2020 · 5763ca3 · 5763ca3
1 parent 47da879
commit 5763ca3
Show file tree

Hide file tree

Showing 3 changed files with 234 additions and 0 deletions.
diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c
@@ -1184,6 +1184,14 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q
 			part_te = (TargetEntry *) copyObject(input);
 			/*need to project all the partial entries so that materialization table is filled */
 			part_te->resjunk = false;
+
+			/* Partial entry should have actual position in the column list of the materialization
+			 * table, which is not necessary the same as original. For example, if a column in a
+			 * original view query is an expression with several aggregates, e.g.,
+			 * `max(val)-min(val)`, then the original column will be represented as two partial
+			 * entries: one for `min` and one for `max`. */
+			part_te->resno = matcolno;
+
 			if (timebkt_chk)
 			{
 				col->is_not_null = true;

diff --git a/tsl/test/expected/continuous_aggs.out b/tsl/test/expected/continuous_aggs.out
@@ -1276,3 +1276,138 @@ SELECT * FROM mat_types;
  Fri Dec 31 16:00:00 1999 PST | yellow | {foo,bar,baz} | {1,2.5,3}
 (1 row)
 
+-------------------------------------------------------------------------------------
+-- Test issue #2616 where cagg view contains an experssion with several aggregates in
+CREATE TABLE water_consumption
+(
+    sensor_id   integer      NOT NULL,
+    timestamp   timestamp(0) NOT NULL,
+    water_index integer
+);
+SELECT create_hypertable('water_consumption', 'timestamp', 'sensor_id', 2);
+        create_hypertable        
+---------------------------------
+ (34,public,water_consumption,t)
+(1 row)
+
+INSERT INTO public.water_consumption (sensor_id, timestamp, water_index) VALUES 
+  (1, '2010-11-03 09:42:30', 1030),
+  (1, '2010-11-03 09:42:40', 1032),
+  (1, '2010-11-03 09:42:50', 1035),
+  (1, '2010-11-03 09:43:30', 1040),
+  (1, '2010-11-03 09:43:40', 1045),
+  (1, '2010-11-03 09:43:50', 1050),
+  (1, '2010-11-03 09:44:30', 1052),
+  (1, '2010-11-03 09:44:40', 1057),
+  (1, '2010-11-03 09:44:50', 1060),
+  (1, '2010-11-03 09:45:30', 1063),
+  (1, '2010-11-03 09:45:40', 1067),
+  (1, '2010-11-03 09:45:50', 1070);
+-- The test with the view originally reported in the issue.
+CREATE MATERIALIZED VIEW water_consumption_aggregation_minute
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+CALL refresh_continuous_aggregate('water_consumption_aggregation_minute', NULL, NULL);
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_aggregation_minute ORDER BY water_consumption;
+ sensor_id |        timestamp         | water_consumption 
+-----------+--------------------------+-------------------
+         1 | Wed Nov 03 09:43:00 2010 |                 5
+         1 | Wed Nov 03 09:46:00 2010 |                 7
+         1 | Wed Nov 03 09:45:00 2010 |                 8
+         1 | Wed Nov 03 09:44:00 2010 |                10
+(4 rows)
+
+SELECT sensor_id,
+       time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+ sensor_id |        timestamp         | water_consumption 
+-----------+--------------------------+-------------------
+         1 | Wed Nov 03 09:43:00 2010 |                 5
+         1 | Wed Nov 03 09:46:00 2010 |                 7
+         1 | Wed Nov 03 09:45:00 2010 |                 8
+         1 | Wed Nov 03 09:44:00 2010 |                10
+(4 rows)
+
+-- Simplified test, where the view doesn't contain all group by clauses
+CREATE MATERIALIZED VIEW water_consumption_no_select_bucket
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+CALL refresh_continuous_aggregate('water_consumption_no_select_bucket', NULL, NULL);
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_no_select_bucket ORDER BY water_consumption;
+ sensor_id | water_consumption 
+-----------+-------------------
+         1 |                 5
+         1 |                 7
+         1 |                 8
+         1 |                10
+(4 rows)
+
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+ sensor_id | water_consumption 
+-----------+-------------------
+         1 |                 5
+         1 |                 7
+         1 |                 8
+         1 |                10
+(4 rows)
+
+-- The test with SELECT matching GROUP BY and placing aggregate expression not the last
+CREATE MATERIALIZED VIEW water_consumption_aggregation_no_addition
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption,
+       time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+CALL refresh_continuous_aggregate('water_consumption_aggregation_no_addition', NULL, NULL);
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_aggregation_no_addition ORDER BY water_consumption;
+ sensor_id | water_consumption |        timestamp         
+-----------+-------------------+--------------------------
+         1 |                 5 | Wed Nov 03 09:42:00 2010
+         1 |                 7 | Wed Nov 03 09:45:00 2010
+         1 |                 8 | Wed Nov 03 09:44:00 2010
+         1 |                10 | Wed Nov 03 09:43:00 2010
+(4 rows)
+
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption,
+       time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+ sensor_id | water_consumption |        timestamp         
+-----------+-------------------+--------------------------
+         1 |                 5 | Wed Nov 03 09:42:00 2010
+         1 |                 7 | Wed Nov 03 09:45:00 2010
+         1 |                 8 | Wed Nov 03 09:44:00 2010
+         1 |                10 | Wed Nov 03 09:43:00 2010
+(4 rows)
+
+DROP TABLE water_consumption CASCADE;
+NOTICE:  drop cascades to 6 other objects
+NOTICE:  drop cascades to table _timescaledb_internal._hyper_35_75_chunk
+NOTICE:  drop cascades to table _timescaledb_internal._hyper_36_76_chunk
+NOTICE:  drop cascades to table _timescaledb_internal._hyper_37_77_chunk
diff --git a/tsl/test/sql/continuous_aggs.sql b/tsl/test/sql/continuous_aggs.sql
@@ -930,3 +930,94 @@ GROUP BY
 CALL refresh_continuous_aggregate('mat_types',NULL,NULL);
 SELECT * FROM mat_types;
 
+-------------------------------------------------------------------------------------
+-- Test issue #2616 where cagg view contains an experssion with several aggregates in
+
+CREATE TABLE water_consumption
+(
+    sensor_id   integer      NOT NULL,
+    timestamp   timestamp(0) NOT NULL,
+    water_index integer
+);
+
+SELECT create_hypertable('water_consumption', 'timestamp', 'sensor_id', 2);
+
+INSERT INTO public.water_consumption (sensor_id, timestamp, water_index) VALUES 
+  (1, '2010-11-03 09:42:30', 1030),
+  (1, '2010-11-03 09:42:40', 1032),
+  (1, '2010-11-03 09:42:50', 1035),
+  (1, '2010-11-03 09:43:30', 1040),
+  (1, '2010-11-03 09:43:40', 1045),
+  (1, '2010-11-03 09:43:50', 1050),
+  (1, '2010-11-03 09:44:30', 1052),
+  (1, '2010-11-03 09:44:40', 1057),
+  (1, '2010-11-03 09:44:50', 1060),
+  (1, '2010-11-03 09:45:30', 1063),
+  (1, '2010-11-03 09:45:40', 1067),
+  (1, '2010-11-03 09:45:50', 1070);
+
+-- The test with the view originally reported in the issue.
+CREATE MATERIALIZED VIEW water_consumption_aggregation_minute
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+
+CALL refresh_continuous_aggregate('water_consumption_aggregation_minute', NULL, NULL);
+
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_aggregation_minute ORDER BY water_consumption;
+SELECT sensor_id,
+       time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+
+-- Simplified test, where the view doesn't contain all group by clauses
+CREATE MATERIALIZED VIEW water_consumption_no_select_bucket
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+
+CALL refresh_continuous_aggregate('water_consumption_no_select_bucket', NULL, NULL);
+
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_no_select_bucket ORDER BY water_consumption;
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+
+-- The test with SELECT matching GROUP BY and placing aggregate expression not the last
+CREATE MATERIALIZED VIEW water_consumption_aggregation_no_addition
+            WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
+AS
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption,
+       time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+WITH NO DATA;
+
+CALL refresh_continuous_aggregate('water_consumption_aggregation_no_addition', NULL, NULL);
+
+-- The results of the view and the query over hypertable should be the same
+SELECT * FROM water_consumption_aggregation_no_addition ORDER BY water_consumption;
+SELECT sensor_id,
+       (max(water_index) - min(water_index))                    AS water_consumption,
+       time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
+FROM water_consumption
+GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
+ORDER BY water_consumption;
+
+DROP TABLE water_consumption CASCADE;