Skip to content

Commit

Permalink
Fix support for complex aggregate expression
Browse files Browse the repository at this point in the history
Fixes support for continuous aggregates when the view query contains
an expression with several aggregates, e.g., `max(val) - min(val)`.
Usage of continuous aggregates with such expression was producing
errors if the aggregate expression was not the last in the SELECT
clause or not all GROUP BY expressions were present in the SELECT
clause.

An expression with several aggregates is materialized with partials
per aggregate. For example, `max(val) - min(val)` will be materialized
in two partial entry columns: one for `max` and one for `min`. Thus
all columns in the materialized hypertable should account for the
number of partials and cannot just use the position in the original
query. This fix makes sure to account for such case.

Fixes #2616
  • Loading branch information
k-rus committed Nov 20, 2020
1 parent 47da879 commit 5763ca3
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
8 changes: 8 additions & 0 deletions tsl/src/continuous_aggs/create.c
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,14 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q
part_te = (TargetEntry *) copyObject(input);
/*need to project all the partial entries so that materialization table is filled */
part_te->resjunk = false;

/* Partial entry should have actual position in the column list of the materialization
* table, which is not necessary the same as original. For example, if a column in a
* original view query is an expression with several aggregates, e.g.,
* `max(val)-min(val)`, then the original column will be represented as two partial
* entries: one for `min` and one for `max`. */
part_te->resno = matcolno;

if (timebkt_chk)
{
col->is_not_null = true;
Expand Down
135 changes: 135 additions & 0 deletions tsl/test/expected/continuous_aggs.out
Original file line number Diff line number Diff line change
Expand Up @@ -1276,3 +1276,138 @@ SELECT * FROM mat_types;
Fri Dec 31 16:00:00 1999 PST | yellow | {foo,bar,baz} | {1,2.5,3}
(1 row)

-------------------------------------------------------------------------------------
-- Test issue #2616 where cagg view contains an experssion with several aggregates in
CREATE TABLE water_consumption
(
sensor_id integer NOT NULL,
timestamp timestamp(0) NOT NULL,
water_index integer
);
SELECT create_hypertable('water_consumption', 'timestamp', 'sensor_id', 2);
create_hypertable
---------------------------------
(34,public,water_consumption,t)
(1 row)

INSERT INTO public.water_consumption (sensor_id, timestamp, water_index) VALUES
(1, '2010-11-03 09:42:30', 1030),
(1, '2010-11-03 09:42:40', 1032),
(1, '2010-11-03 09:42:50', 1035),
(1, '2010-11-03 09:43:30', 1040),
(1, '2010-11-03 09:43:40', 1045),
(1, '2010-11-03 09:43:50', 1050),
(1, '2010-11-03 09:44:30', 1052),
(1, '2010-11-03 09:44:40', 1057),
(1, '2010-11-03 09:44:50', 1060),
(1, '2010-11-03 09:45:30', 1063),
(1, '2010-11-03 09:45:40', 1067),
(1, '2010-11-03 09:45:50', 1070);
-- The test with the view originally reported in the issue.
CREATE MATERIALIZED VIEW water_consumption_aggregation_minute
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;
CALL refresh_continuous_aggregate('water_consumption_aggregation_minute', NULL, NULL);
-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_aggregation_minute ORDER BY water_consumption;
sensor_id | timestamp | water_consumption
-----------+--------------------------+-------------------
1 | Wed Nov 03 09:43:00 2010 | 5
1 | Wed Nov 03 09:46:00 2010 | 7
1 | Wed Nov 03 09:45:00 2010 | 8
1 | Wed Nov 03 09:44:00 2010 | 10
(4 rows)

SELECT sensor_id,
time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;
sensor_id | timestamp | water_consumption
-----------+--------------------------+-------------------
1 | Wed Nov 03 09:43:00 2010 | 5
1 | Wed Nov 03 09:46:00 2010 | 7
1 | Wed Nov 03 09:45:00 2010 | 8
1 | Wed Nov 03 09:44:00 2010 | 10
(4 rows)

-- Simplified test, where the view doesn't contain all group by clauses
CREATE MATERIALIZED VIEW water_consumption_no_select_bucket
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;
CALL refresh_continuous_aggregate('water_consumption_no_select_bucket', NULL, NULL);
-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_no_select_bucket ORDER BY water_consumption;
sensor_id | water_consumption
-----------+-------------------
1 | 5
1 | 7
1 | 8
1 | 10
(4 rows)

SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;
sensor_id | water_consumption
-----------+-------------------
1 | 5
1 | 7
1 | 8
1 | 10
(4 rows)

-- The test with SELECT matching GROUP BY and placing aggregate expression not the last
CREATE MATERIALIZED VIEW water_consumption_aggregation_no_addition
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption,
time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;
CALL refresh_continuous_aggregate('water_consumption_aggregation_no_addition', NULL, NULL);
-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_aggregation_no_addition ORDER BY water_consumption;
sensor_id | water_consumption | timestamp
-----------+-------------------+--------------------------
1 | 5 | Wed Nov 03 09:42:00 2010
1 | 7 | Wed Nov 03 09:45:00 2010
1 | 8 | Wed Nov 03 09:44:00 2010
1 | 10 | Wed Nov 03 09:43:00 2010
(4 rows)

SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption,
time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;
sensor_id | water_consumption | timestamp
-----------+-------------------+--------------------------
1 | 5 | Wed Nov 03 09:42:00 2010
1 | 7 | Wed Nov 03 09:45:00 2010
1 | 8 | Wed Nov 03 09:44:00 2010
1 | 10 | Wed Nov 03 09:43:00 2010
(4 rows)

DROP TABLE water_consumption CASCADE;
NOTICE: drop cascades to 6 other objects
NOTICE: drop cascades to table _timescaledb_internal._hyper_35_75_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_36_76_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_37_77_chunk
91 changes: 91 additions & 0 deletions tsl/test/sql/continuous_aggs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -930,3 +930,94 @@ GROUP BY
CALL refresh_continuous_aggregate('mat_types',NULL,NULL);
SELECT * FROM mat_types;

-------------------------------------------------------------------------------------
-- Test issue #2616 where cagg view contains an experssion with several aggregates in

CREATE TABLE water_consumption
(
sensor_id integer NOT NULL,
timestamp timestamp(0) NOT NULL,
water_index integer
);

SELECT create_hypertable('water_consumption', 'timestamp', 'sensor_id', 2);

INSERT INTO public.water_consumption (sensor_id, timestamp, water_index) VALUES
(1, '2010-11-03 09:42:30', 1030),
(1, '2010-11-03 09:42:40', 1032),
(1, '2010-11-03 09:42:50', 1035),
(1, '2010-11-03 09:43:30', 1040),
(1, '2010-11-03 09:43:40', 1045),
(1, '2010-11-03 09:43:50', 1050),
(1, '2010-11-03 09:44:30', 1052),
(1, '2010-11-03 09:44:40', 1057),
(1, '2010-11-03 09:44:50', 1060),
(1, '2010-11-03 09:45:30', 1063),
(1, '2010-11-03 09:45:40', 1067),
(1, '2010-11-03 09:45:50', 1070);

-- The test with the view originally reported in the issue.
CREATE MATERIALIZED VIEW water_consumption_aggregation_minute
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;

CALL refresh_continuous_aggregate('water_consumption_aggregation_minute', NULL, NULL);

-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_aggregation_minute ORDER BY water_consumption;
SELECT sensor_id,
time_bucket(INTERVAL '1 minute', timestamp) + '1 minute' AS timestamp,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;

-- Simplified test, where the view doesn't contain all group by clauses
CREATE MATERIALIZED VIEW water_consumption_no_select_bucket
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;

CALL refresh_continuous_aggregate('water_consumption_no_select_bucket', NULL, NULL);

-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_no_select_bucket ORDER BY water_consumption;
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;

-- The test with SELECT matching GROUP BY and placing aggregate expression not the last
CREATE MATERIALIZED VIEW water_consumption_aggregation_no_addition
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
AS
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption,
time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
WITH NO DATA;

CALL refresh_continuous_aggregate('water_consumption_aggregation_no_addition', NULL, NULL);

-- The results of the view and the query over hypertable should be the same
SELECT * FROM water_consumption_aggregation_no_addition ORDER BY water_consumption;
SELECT sensor_id,
(max(water_index) - min(water_index)) AS water_consumption,
time_bucket(INTERVAL '1 minute', timestamp) AS timestamp
FROM water_consumption
GROUP BY sensor_id, time_bucket(INTERVAL '1 minute', timestamp)
ORDER BY water_consumption;

DROP TABLE water_consumption CASCADE;

0 comments on commit 5763ca3

Please sign in to comment.