Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve compression datatype handling #6081

Merged
merged 1 commit into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .unreleased/PR_6081
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes: #6081 Improve compressed DML datatype handling

15 changes: 13 additions & 2 deletions tsl/src/compression/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <nodes/pg_list.h>
#include <nodes/print.h>
#include <parser/parsetree.h>
#include <parser/parse_coerce.h>
#include <storage/lmgr.h>
#include <storage/predicate.h>
#include <utils/builtins.h>
Expand Down Expand Up @@ -1962,8 +1963,18 @@
elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid));

Oid opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy);
Assert(OidIsValid(opr));
/* We should never end up here but: no operator, no optimization */

/*
* Fall back to btree operator input type when it is binary compatible with
* the column type and no operator for column type could be found.
*/
if (!OidIsValid(opr) && IsBinaryCoercible(atttypid, tce->btree_opintype))
{
opr =

Check warning on line 1973 in tsl/src/compression/compression.c

View check run for this annotation

Codecov / codecov/patch

tsl/src/compression/compression.c#L1973

Added line #L1973 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codecov complains about missing coverage. However, the test case seems to cover this part of the code and removing these lines triggers a test failure. So, it seems to be tested.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codecov has been all over the place lately, not sure whats going on with it. Reporting random lines being untested which are in the same branch where all other lines have been tested.

get_opfamily_member(tce->btree_opf, tce->btree_opintype, tce->btree_opintype, strategy);
}

Check warning on line 1975 in tsl/src/compression/compression.c

View check run for this annotation

Codecov / codecov/patch

tsl/src/compression/compression.c#L1975

Added line #L1975 was not covered by tests

/* No operator could be found so we can't create the scankey. */
if (!OidIsValid(opr))
return num_scankeys;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Might be out of the scope of this PR. But I think we should also remove the following Assert in Line 1971/1982 (Assert(OidIsValid(opr));).

Again, we have an Assert followed by an if/else on the same condition. The assert prevents proper testing of this branch in a debug build.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me.


Expand Down
65 changes: 65 additions & 0 deletions tsl/test/shared/expected/compression_dml.out
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,68 @@ QUERY PLAN

DROP TABLE mytab CASCADE;
NOTICE: drop cascades to table _timescaledb_internal.compress_hyper_X_X_chunk
-- test varchar segmentby
CREATE TABLE comp_seg_varchar (
time timestamptz NOT NULL,
source_id varchar(64) NOT NULL,
label varchar NOT NULL,
data jsonb
);
SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time');
WARNING: column type "character varying" used for "source_id" does not follow best practices
WARNING: column type "character varying" used for "label" does not follow best practices
table_name
comp_seg_varchar
(1 row)

CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC);
ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time');
INSERT INTO comp_seg_varchar
SELECT time, source_id, label, '{}' AS data
FROM
generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time),
generate_series(1, 3, 1 ) AS g2(source_id),
generate_series(1, 3, 1 ) AS g3(label);
SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c;
compress_chunk
_timescaledb_internal._hyper_X_X_chunk
_timescaledb_internal._hyper_X_X_chunk
(2 rows)

-- all tuples should come from compressed chunks
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=90 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(5 rows)

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';
-- no tuples should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=91 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
-> Seq Scan on _hyper_X_X_chunk (actual rows=1 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(6 rows)

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';
-- 1 batch should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=92 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=24 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=1)
-> Seq Scan on _hyper_X_X_chunk (actual rows=5 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(6 rows)

DROP TABLE comp_seg_varchar;
40 changes: 40 additions & 0 deletions tsl/test/shared/sql/compression_dml.sql
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,43 @@ INSERT INTO mytab SELECT '2022-10-07 05:30:10+05:30'::timestamp with time zone,
EXPLAIN (costs off) SELECT * FROM :chunk_table;
DROP TABLE mytab CASCADE;

-- test varchar segmentby
CREATE TABLE comp_seg_varchar (
time timestamptz NOT NULL,
source_id varchar(64) NOT NULL,
label varchar NOT NULL,
data jsonb
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit

Suggested change
data jsonb
data jsonb

);

SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time');

CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC);

ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time');

INSERT INTO comp_seg_varchar
SELECT time, source_id, label, '{}' AS data
FROM
generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time),
generate_series(1, 3, 1 ) AS g2(source_id),
generate_series(1, 3, 1 ) AS g3(label);

SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c;


-- all tuples should come from compressed chunks
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';

-- no tuples should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';

-- 1 batch should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

DROP TABLE comp_seg_varchar;