Skip to content

Commit

Permalink
Remove paths that reference parent relids for compressed chunks
Browse files Browse the repository at this point in the history
We explicilty filter paths for compressed chunks that
have spurious join clauses between the compressed chunk and
the original chunk or hypertable. However there are other
cases where a chunk could be a child rel
(i.e. RELOPT_OTHER_MEMBER_REL) such as when the chunk is
referenced as part of a UNION ALL query. We remove all
paths that have spurious join clauses between the compressed
chunk and any implied parent for the chunk.

Fixes #2917
  • Loading branch information
gayyappan committed Jun 10, 2021
1 parent ac031c8 commit 318d2fd
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 4 deletions.
25 changes: 21 additions & 4 deletions tsl/src/nodes/decompress_chunk/decompress_chunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
info,
&sort_info);

/* compute parent relids of the chunk and use it to filter paths*/
Relids parent_relids = find_childrel_parents(root, chunk_rel);
/* create non-parallel paths */
foreach (lc, compressed_rel->pathlist)
{
Expand All @@ -379,11 +381,26 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
* Filter out all paths that try to JOIN the compressed chunk on the
* hypertable or the uncompressed chunk
* Ideally, we wouldn't create these paths in the first place.
* However, create_join_clause code is called by PG while generating paths for the
* compressed_rel via generate_implied_equalities_for_column.
* create_join_clause ends up creating rinfo's between compressed_rel and ht because
* PG does not know that compressed_rel is related to ht in anyway.
* The parent-child relationship between chunk_rel and ht is known
* to PG and so it does not try to create meaningless rinfos for that case.
*/
if (child_path->param_info != NULL &&
(bms_is_member(chunk_rel->relid, child_path->param_info->ppi_req_outer) ||
bms_is_member(ht_index, child_path->param_info->ppi_req_outer)))
continue;
if (child_path->param_info != NULL)
{
if (bms_is_member(chunk_rel->relid, child_path->param_info->ppi_req_outer))
continue;
/* check if this is path made with references between
* compressed_rel + hypertable or a nesting subquery.
* The latter can happen in the case of UNION queries. see github 2917. This
* happens since PG is not aware that the nesting
* subquery that references the hypertable is a parent of compressed_rel as well.
*/
if (bms_overlap(parent_relids, child_path->param_info->ppi_req_outer))
continue;
}

path = decompress_chunk_path_create(root, info, 0, child_path);

Expand Down
118 changes: 118 additions & 0 deletions tsl/test/expected/transparent_decompression_ordered_index-12.out
Original file line number Diff line number Diff line change
Expand Up @@ -983,3 +983,121 @@ ORDER BY m.v0;
(32 rows)

SET timescaledb.enable_chunk_append TO TRUE;
-- github bug 2917 with UNION ALL that references compressed ht
CREATE TABLE entity
(
oid bigint PRIMARY KEY,
type text,
name text
);
INSERT INTO entity values(10, 'VMEM', 'cpu');
CREATE TABLE entity_m2
(
timec timestamp with time zone NOT NULL,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision,
peak double precision
);
SELECT create_hypertable('entity_m2', 'timec', chunk_time_interval=>'30 days'::interval);
create_hypertable
------------------------
(5,public,entity_m2,t)
(1 row)

INSERT INTO entity_m2 values (
'2020-12-21 15:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);
INSERT INTO entity_m2 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);
ALTER TABLE entity_m2 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m2') c;
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_5_13_chunk
(1 row)

CREATE TABLE entity_m1
(
timec timestamp with time zone ,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision
);
SELECT create_hypertable('entity_m1', 'timec', chunk_time_interval=>'30 days'::interval);
NOTICE: adding not-null constraint to column "timec"
create_hypertable
------------------------
(7,public,entity_m1,t)
(1 row)

INSERT INTO entity_m1 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 100 , 0 );
create view metric_view as
SELECT m2.timec,
m2.entity_oid,
m2.entity_hash,
m2.type,
m2.current,
m2.capacity,
m2.utilization,
m2.peak
FROM entity_m2 m2
UNION ALL
SELECT m1.timec,
m1.entity_oid,
m1.entity_hash,
m1.type,
m1.current,
m1.capacity,
m1.utilization,
NULL::double precision AS peak
FROM entity_m1 m1;
SET enable_bitmapscan = false;
SET enable_hashjoin = false;
SET enable_mergejoin = false;
SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;
timec | avg_util
----------------------------------+----------
Mon Dec 21 12:47:58.778 2020 PST | 0
Mon Dec 21 13:47:58.778 2020 PST | 0
(2 rows)

--now compress the other table too and rerun the query --
ALTER TABLE entity_m1 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m1') c;
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_7_15_chunk
(1 row)

SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;
timec | avg_util
----------------------------------+----------
Mon Dec 21 12:47:58.778 2020 PST | 0
Mon Dec 21 13:47:58.778 2020 PST | 0
(2 rows)

RESET enable_bitmapscan ;
RESET enable_hashjoin ;
RESET enable_mergejoin;
-- end github bug 2917
118 changes: 118 additions & 0 deletions tsl/test/expected/transparent_decompression_ordered_index-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -985,3 +985,121 @@ ORDER BY m.v0;
(32 rows)

SET timescaledb.enable_chunk_append TO TRUE;
-- github bug 2917 with UNION ALL that references compressed ht
CREATE TABLE entity
(
oid bigint PRIMARY KEY,
type text,
name text
);
INSERT INTO entity values(10, 'VMEM', 'cpu');
CREATE TABLE entity_m2
(
timec timestamp with time zone NOT NULL,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision,
peak double precision
);
SELECT create_hypertable('entity_m2', 'timec', chunk_time_interval=>'30 days'::interval);
create_hypertable
------------------------
(5,public,entity_m2,t)
(1 row)

INSERT INTO entity_m2 values (
'2020-12-21 15:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);
INSERT INTO entity_m2 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);
ALTER TABLE entity_m2 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m2') c;
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_5_13_chunk
(1 row)

CREATE TABLE entity_m1
(
timec timestamp with time zone ,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision
);
SELECT create_hypertable('entity_m1', 'timec', chunk_time_interval=>'30 days'::interval);
NOTICE: adding not-null constraint to column "timec"
create_hypertable
------------------------
(7,public,entity_m1,t)
(1 row)

INSERT INTO entity_m1 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 100 , 0 );
create view metric_view as
SELECT m2.timec,
m2.entity_oid,
m2.entity_hash,
m2.type,
m2.current,
m2.capacity,
m2.utilization,
m2.peak
FROM entity_m2 m2
UNION ALL
SELECT m1.timec,
m1.entity_oid,
m1.entity_hash,
m1.type,
m1.current,
m1.capacity,
m1.utilization,
NULL::double precision AS peak
FROM entity_m1 m1;
SET enable_bitmapscan = false;
SET enable_hashjoin = false;
SET enable_mergejoin = false;
SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;
timec | avg_util
----------------------------------+----------
Mon Dec 21 12:47:58.778 2020 PST | 0
Mon Dec 21 13:47:58.778 2020 PST | 0
(2 rows)

--now compress the other table too and rerun the query --
ALTER TABLE entity_m1 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m1') c;
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_7_15_chunk
(1 row)

SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;
timec | avg_util
----------------------------------+----------
Mon Dec 21 12:47:58.778 2020 PST | 0
Mon Dec 21 13:47:58.778 2020 PST | 0
(2 rows)

RESET enable_bitmapscan ;
RESET enable_hashjoin ;
RESET enable_mergejoin;
-- end github bug 2917
99 changes: 99 additions & 0 deletions tsl/test/sql/transparent_decompression_ordered_index.sql.in
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,102 @@ SET enable_seqscan = FALSE;
SET enable_seqscan = TRUE;

\ir include/transparent_decompression_constraintaware.sql

-- github bug 2917 with UNION ALL that references compressed ht
CREATE TABLE entity
(
oid bigint PRIMARY KEY,
type text,
name text
);

INSERT INTO entity values(10, 'VMEM', 'cpu');

CREATE TABLE entity_m2
(
timec timestamp with time zone NOT NULL,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision,
peak double precision
);

SELECT create_hypertable('entity_m2', 'timec', chunk_time_interval=>'30 days'::interval);

INSERT INTO entity_m2 values (
'2020-12-21 15:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);
INSERT INTO entity_m2 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 2097152 , 0 , 100);

ALTER TABLE entity_m2 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');

SELECT compress_chunk(c) FROM show_chunks('entity_m2') c;

CREATE TABLE entity_m1
(
timec timestamp with time zone ,
entity_oid bigint ,
entity_hash bigint ,
type text ,
current double precision,
capacity double precision,
utilization double precision
);

SELECT create_hypertable('entity_m1', 'timec', chunk_time_interval=>'30 days'::interval);
INSERT INTO entity_m1 values (
'2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0, 100 , 0 );


create view metric_view as
SELECT m2.timec,
m2.entity_oid,
m2.entity_hash,
m2.type,
m2.current,
m2.capacity,
m2.utilization,
m2.peak
FROM entity_m2 m2
UNION ALL
SELECT m1.timec,
m1.entity_oid,
m1.entity_hash,
m1.type,
m1.current,
m1.capacity,
m1.utilization,
NULL::double precision AS peak
FROM entity_m1 m1;

SET enable_bitmapscan = false;
SET enable_hashjoin = false;
SET enable_mergejoin = false;
SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;

--now compress the other table too and rerun the query --
ALTER TABLE entity_m1 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m1') c;
SELECT m.timec, avg(m.utilization) AS avg_util
FROM metric_view m, entity e
WHERE m.type = 'VMEM'
AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
AND m.entity_oid = e.oid
GROUP BY 1 ORDER BY 1;
RESET enable_bitmapscan ;
RESET enable_hashjoin ;
RESET enable_mergejoin;

-- end github bug 2917

0 comments on commit 318d2fd

Please sign in to comment.