diff --git a/.unreleased/fix_6365 b/.unreleased/fix_6365 new file mode 100644 index 00000000000..f552f494169 --- /dev/null +++ b/.unreleased/fix_6365 @@ -0,0 +1 @@ +Fixes: #6365 Use numrows_pre_compression in approximate row count diff --git a/sql/size_utils.sql b/sql/size_utils.sql index 19645534094..a362ffb861b 100644 --- a/sql/size_utils.sql +++ b/sql/size_utils.sql @@ -453,14 +453,15 @@ BEGIN IF local_compressed_hypertable_id IS NOT NULL THEN uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); - WITH compressed_hypertable AS (SELECT table_name, schema_name FROM _timescaledb_catalog.hypertable ht - WHERE ht.id = local_compressed_hypertable_id) - SELECT c.oid INTO compressed_hypertable_oid FROM pg_class c - INNER JOIN compressed_hypertable h ON (c.relname = h.table_name) - INNER JOIN pg_namespace n ON (n.nspname = h.schema_name); - - compressed_row_count = _timescaledb_functions.get_approx_row_count(compressed_hypertable_oid); - RETURN (uncompressed_row_count + (compressed_row_count * max_compressed_row_count)); + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch, + _timescaledb_catalog.compression_chunk_size map, _timescaledb_catalog.hypertable srcht + INTO compressed_row_count + WHERE map.chunk_id = srcch.id + AND srcht.id = srcch.hypertable_id AND srcht.table_name = local_table_name + AND srcht.schema_name = local_schema_name; + + RETURN (uncompressed_row_count + compressed_row_count); ELSE uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); RETURN uncompressed_row_count; @@ -477,22 +478,24 @@ BEGIN -- 'input is chunk #1'; IF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NOT NULL THEN -- 'Include both uncompressed and compressed chunk #2'; - WITH compressed_ns_oid AS ( SELECT table_name, oid FROM _timescaledb_catalog.chunk ch INNER JOIN pg_namespace ns ON - (ch.id = local_compressed_chunk_id and ch.schema_name = ns.nspname)) - SELECT c.oid FROM pg_class c INNER JOIN compressed_ns_oid - ON ( c.relnamespace = compressed_ns_oid.oid AND c.relname = compressed_ns_oid.table_name) - INTO local_compressed_chunk_oid; + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(numrows_pre_compression, 0) FROM _timescaledb_catalog.compression_chunk_size + INTO compressed_row_count + WHERE compressed_chunk_id = local_compressed_chunk_id; uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); - compressed_row_count = _timescaledb_functions.get_approx_row_count(local_compressed_chunk_oid); - RETURN uncompressed_row_count + (compressed_row_count * max_compressed_row_count); + RETURN (uncompressed_row_count + compressed_row_count); ELSIF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NULL THEN -- 'input relation is uncompressed chunk #3'; uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); RETURN uncompressed_row_count; ELSE -- 'compressed chunk only #4'; - compressed_row_count = _timescaledb_functions.get_approx_row_count(relation) * max_compressed_row_count; + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch, + _timescaledb_catalog.compression_chunk_size map INTO compressed_row_count + WHERE map.compressed_chunk_id = srcch.id + AND srcch.table_name = local_table_name AND srcch.schema_name = local_schema_name; RETURN compressed_row_count; END IF; END IF; diff --git a/tsl/test/expected/compression.out b/tsl/test/expected/compression.out index 6ca32244a70..bea8c48e1c8 100644 --- a/tsl/test/expected/compression.out +++ b/tsl/test/expected/compression.out @@ -39,6 +39,20 @@ insert into foo values( 3 , 16 , 20, NULL); insert into foo values( 10 , 10 , 20, NULL); insert into foo values( 20 , 11 , 20, NULL); insert into foo values( 30 , 12 , 20, NULL); +analyze foo; +-- check that approximate_row_count works with a regular table +SELECT approximate_row_count('foo'); + approximate_row_count +----------------------- + 4 +(1 row) + +SELECT count(*) from foo; + count +------- + 4 +(1 row) + alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last'); --test self-refencing updates SET timescaledb.enable_transparent_decompression to ON; @@ -1259,6 +1273,7 @@ SELECT * FROM pg_stats WHERE tablename = :statchunk; (0 rows) ALTER TABLE stattest SET (timescaledb.compress); +-- check that approximate_row_count works with all normal chunks SELECT approximate_row_count('stattest'); approximate_row_count ----------------------- @@ -1271,10 +1286,18 @@ SELECT compress_chunk(c) FROM show_chunks('stattest') c; _timescaledb_internal._hyper_27_55_chunk (1 row) +-- check that approximate_row_count works with all compressed chunks SELECT approximate_row_count('stattest'); approximate_row_count ----------------------- - 0 + 26 +(1 row) + +-- actual count should match with the above +SELECT count(*) from stattest; + count +------- + 26 (1 row) -- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table. @@ -1330,6 +1353,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME' 1 | 1 (1 row) +-- verify that approximate_row_count works fine on a chunk with compressed data +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME'); + approximate_row_count +----------------------- + 26 +(1 row) + -- Verify partial chunk stats are handled correctly when analyzing -- for both uncompressed and compressed chunk tables INSERT INTO stattest SELECT '2020/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(25,50) v; @@ -1362,6 +1392,28 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME' 1 | 1 (1 row) +-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed +-- and compressed data +SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME'); + approximate_row_count +----------------------- + 52 +(1 row) + +-- should match with the result via the hypertable post in-memory decompression +SELECT count(*) from stattest; + count +------- + 52 +(1 row) + +SELECT count(*) from show_chunks('stattest'); + count +------- + 1 +(1 row) + -- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting SELECT reloptions FROM pg_class WHERE relname = :statchunk; reloptions @@ -1423,7 +1475,6 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk; 1 | 52 (1 row) -DROP TABLE stattest; --- Test that analyze on compression internal table updates stats on original chunks CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int); SELECT create_hypertable('stattest2', 'time', chunk_time_interval=>'1 day'::interval); @@ -2626,3 +2677,30 @@ SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5; (70 rows) RESET timescaledb.enable_decompression_sorted_merge; +-- create another chunk +INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v; +ANALYZE stattest; +SELECT count(*) from show_chunks('stattest'); + count +------- + 2 +(1 row) + +SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1; +SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset +-- verify that approximate_row_count works ok on normal chunks +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME'); + approximate_row_count +----------------------- + 16 +(1 row) + +-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed +-- and compressed data +SELECT approximate_row_count('stattest'); + approximate_row_count +----------------------- + 68 +(1 row) + +DROP TABLE stattest; diff --git a/tsl/test/sql/compression.sql b/tsl/test/sql/compression.sql index b0412b58a53..8249c7d3396 100644 --- a/tsl/test/sql/compression.sql +++ b/tsl/test/sql/compression.sql @@ -19,6 +19,10 @@ insert into foo values( 3 , 16 , 20, NULL); insert into foo values( 10 , 10 , 20, NULL); insert into foo values( 20 , 11 , 20, NULL); insert into foo values( 30 , 12 , 20, NULL); +analyze foo; +-- check that approximate_row_count works with a regular table +SELECT approximate_row_count('foo'); +SELECT count(*) from foo; alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last'); @@ -538,9 +542,13 @@ SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE SELECT * FROM pg_stats WHERE tablename = :statchunk; ALTER TABLE stattest SET (timescaledb.compress); +-- check that approximate_row_count works with all normal chunks SELECT approximate_row_count('stattest'); SELECT compress_chunk(c) FROM show_chunks('stattest') c; +-- check that approximate_row_count works with all compressed chunks SELECT approximate_row_count('stattest'); +-- actual count should match with the above +SELECT count(*) from stattest; -- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table. -- reltuples is initially -1 on PG14 before VACUUM/ANALYZE was run SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples FROM pg_class WHERE relname = :statchunk; @@ -564,6 +572,8 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk; -- verify that corresponding compressed chunk table stats is updated as well. SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'; +-- verify that approximate_row_count works fine on a chunk with compressed data +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME'); -- Verify partial chunk stats are handled correctly when analyzing -- for both uncompressed and compressed chunk tables @@ -577,7 +587,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk; -- verify that corresponding compressed chunk table stats have not changed since -- we didn't compress anything new. SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'; - +-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed +-- and compressed data +SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME'); +-- should match with the result via the hypertable post in-memory decompression +SELECT count(*) from stattest; +SELECT count(*) from show_chunks('stattest'); -- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting SELECT reloptions FROM pg_class WHERE relname = :statchunk; @@ -597,7 +613,6 @@ SET client_min_messages TO NOTICE; SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk and attname = 'c1'; SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk; -DROP TABLE stattest; --- Test that analyze on compression internal table updates stats on original chunks CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int); @@ -1052,3 +1067,17 @@ SET timescaledb.enable_decompression_sorted_merge = FALSE; :PREFIX SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5; RESET timescaledb.enable_decompression_sorted_merge; + +-- create another chunk +INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v; +ANALYZE stattest; +SELECT count(*) from show_chunks('stattest'); +SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1; +SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset +-- verify that approximate_row_count works ok on normal chunks +SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME'); +-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed +-- and compressed data +SELECT approximate_row_count('stattest'); + +DROP TABLE stattest;