Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add invalidations for incomplete aggregates #2403

Merged
merged 1 commit into from Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions scripts/test_update_from_tag.sh
Expand Up @@ -175,14 +175,14 @@ docker_run_vol ${CONTAINER_UPDATED} ${UPDATE_VOLUME}:/var/lib/postgresql/data ${
echo "Executing ALTER EXTENSION timescaledb UPDATE"
docker_pgcmd ${CONTAINER_UPDATED} "ALTER EXTENSION timescaledb UPDATE"

docker_exec ${CONTAINER_UPDATED} "pg_dump -h localhost -U postgres -Fc single > /tmp/single.sql"
docker cp ${CONTAINER_UPDATED}:/tmp/single.sql ${TEST_TMPDIR}/single.sql

echo "Executing setup script on clean"
docker_pgscript ${CONTAINER_CLEAN_RERUN} /src/test/sql/updates/setup.${TEST_VERSION}.sql

echo "Testing updated vs clean"
docker_pgdiff ${CONTAINER_UPDATED} ${CONTAINER_CLEAN_RERUN} /src/test/sql/updates/test-rerun.sql
docker_pgdiff ${CONTAINER_UPDATED} ${CONTAINER_CLEAN_RERUN} /src/test/sql/updates/test-rerun.${TEST_VERSION}.sql

docker_exec ${CONTAINER_UPDATED} "pg_dump -h localhost -U postgres -Fc single > /tmp/single.sql"
docker cp ${CONTAINER_UPDATED}:/tmp/single.sql ${TEST_TMPDIR}/single.sql

echo "Restoring database on clean version"
docker cp ${TEST_TMPDIR}/single.sql ${CONTAINER_CLEAN_RESTORE}:/tmp/single.sql
Expand Down
2 changes: 1 addition & 1 deletion scripts/test_updates.sh
Expand Up @@ -30,7 +30,7 @@ do
docker rmi -f ${UPDATE_TO_IMAGE}:${UPDATE_TO_TAG}
;;
d)
echo "Keeping temporary directory"
echo "Keeping temporary directory ${TEST_TMPDIR}"
KEEP_TEMP_DIRS=true
TEST_UPDATE_FROM_TAGS_EXTRA_ARGS="-d"
;;
Expand Down
9 changes: 9 additions & 0 deletions sql/updates/latest-dev.sql
Expand Up @@ -303,6 +303,15 @@ SELECT pg_catalog.pg_extension_config_dump('_timescaledb_config.bgw_job', 'WHERE
GRANT SELECT ON _timescaledb_config.bgw_job TO PUBLIC;
GRANT SELECT ON _timescaledb_config.bgw_job_id_seq TO PUBLIC;

-- Add entry to materialization invalidation log to indicate that [watermark, +infinity) is invalid
INSERT INTO _timescaledb_catalog.continuous_aggs_materialization_invalidation_log
SELECT materialization_id, BIGINT '-9223372036854775808', watermark, 9223372036854775807
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have a new cagg that has been never refreshed , do we need an entry in the cont agg materialization log that says [-infinity, +infinity) is invalid? (cc @erimatnor )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like that's pretty tight window. I believe the background job gets launched as soon as we create the materialized view in 1.7.

FROM _timescaledb_catalog.continuous_aggs_completed_threshold;
-- Also handle continuous aggs that have never been run
INSERT INTO _timescaledb_catalog.continuous_aggs_materialization_invalidation_log
SELECT (SELECT mat_hypertable_id FROM _timescaledb_catalog.continuous_agg EXCEPT SELECT materialization_id FROM _timescaledb_catalog.continuous_aggs_completed_threshold),
-9223372036854775808, -9223372036854775808, 9223372036854775807;

-- drop completed_threshold table, which is no longer used
ALTER EXTENSION timescaledb DROP TABLE _timescaledb_catalog.continuous_aggs_completed_threshold;
DROP TABLE IF EXISTS _timescaledb_catalog.continuous_aggs_completed_threshold;
Expand Down
47 changes: 47 additions & 0 deletions test/sql/updates/setup.continuous_aggs.v2.sql
Expand Up @@ -263,3 +263,50 @@ BEGIN
END $$;

REFRESH MATERIALIZED VIEW mat_ignoreinval;

-- test new data beyond the invalidation threshold is properly handled --
CREATE TABLE inval_test (time TIMESTAMPTZ, location TEXT, temperature DOUBLE PRECISION);
SELECT create_hypertable('inval_test', 'time', chunk_time_interval => INTERVAL '1 week');

INSERT INTO inval_test
SELECT generate_series('2018-12-01 00:00'::timestamp, '2018-12-20 00:00'::timestamp, '1 day'), 'POR', generate_series(40.5, 50.0, 0.5);
INSERT INTO inval_test
SELECT generate_series('2018-12-01 00:00'::timestamp, '2018-12-20 00:00'::timestamp, '1 day'), 'NYC', generate_series(31.0, 50.0, 1.0);

DO LANGUAGE PLPGSQL $$
DECLARE
ts_version TEXT;
BEGIN
SELECT extversion INTO ts_version FROM pg_extension WHERE extname = 'timescaledb';

IF ts_version < '2.0.0' THEN
CREATE VIEW mat_inval
WITH ( timescaledb.continuous, timescaledb.materialized_only=true,
timescaledb.refresh_lag='-20 days',
timescaledb.refresh_interval='12 hours',
timescaledb.max_interval_per_job='100000 days' )
AS
SELECT time_bucket('10 minute', time) as bucket, location, min(temperature) as min_temp,
max(temperature) as max_temp, avg(temperature) as avg_temp
FROM inval_test
GROUP BY bucket, location;

ELSE
CREATE MATERIALIZED VIEW mat_inval
WITH ( timescaledb.continuous, timescaledb.materialized_only=true )
AS
SELECT time_bucket('10 minute', time) as bucket, location, min(temperature) as min_temp,
max(temperature) as max_temp, avg(temperature) as avg_temp
FROM inval_test
GROUP BY bucket, location WITH NO DATA;

PERFORM add_continuous_aggregate_policy('mat_inval', NULL, '-20 days'::interval, '12 hours');
END IF;
END $$;

REFRESH MATERIALIZED VIEW mat_inval;

INSERT INTO inval_test
Copy link
Contributor

@gayyappan gayyappan Sep 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This time range is not in the original data set . So don't think you have any invalidations for the last completed threshold range.
The inserts here should be of 2 kinds.

  1. generate invalidations: a subset of the range from the insert at line 271.
  2. New data (if you are testing this case as well) for a new range like at line 311.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually intended to all be new data. I believe we already have other tests for existing invalidations, what this test is intended to verify is that we generate a new invalidation that covers new data when updating to 2.0.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the refresh and query post upgrade done int test-rerun.v6.sql?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I needed to move that from the post script to the rerun script, as the post script isn't called on the rerun container (the post was just run on the upgraded container and a dump/restore of the upgraded container, both of which had the wrong data and hence no diff).

SELECT generate_series('2118-12-01 00:00'::timestamp, '2118-12-20 00:00'::timestamp, '1 day'), 'POR', generate_series(135.25, 140.0, 0.25);
INSERT INTO inval_test
SELECT generate_series('2118-12-01 00:00'::timestamp, '2118-12-20 00:00'::timestamp, '1 day'), 'NYC', generate_series(131.0, 150.0, 1.0);
File renamed without changes.
5 changes: 5 additions & 0 deletions test/sql/updates/test-rerun.v2.sql
@@ -0,0 +1,5 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

\ir test-rerun.v1.sql
5 changes: 5 additions & 0 deletions test/sql/updates/test-rerun.v3.sql
@@ -0,0 +1,5 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

\ir test-rerun.v1.sql
5 changes: 5 additions & 0 deletions test/sql/updates/test-rerun.v4.sql
@@ -0,0 +1,5 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

\ir test-rerun.v1.sql
5 changes: 5 additions & 0 deletions test/sql/updates/test-rerun.v5.sql
@@ -0,0 +1,5 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

\ir test-rerun.v1.sql
9 changes: 9 additions & 0 deletions test/sql/updates/test-rerun.v6.sql
@@ -0,0 +1,9 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

\ir test-rerun.v1.sql

SELECT count(*) FROM mat_inval;
REFRESH MATERIALIZED VIEW mat_inval;
SELECT count(*) FROM mat_inval;