Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix nested loop joins that involve compressed chunks #2851

Merged
merged 1 commit into from Jan 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -9,6 +9,7 @@ accidentally triggering the load of a previous DB version.**
**Bugfixes**
* #2842 Do not mark job as started when seting next_start field
* #2845 Fix continuous aggregate privileges during upgrade
* #2851 Fix nested loop joins that involve compressed chunks
* #2865 Apply volatile function quals at decompresschunk node

**Minor features**
Expand Down
14 changes: 5 additions & 9 deletions tsl/src/nodes/decompress_chunk/decompress_chunk.c
Expand Up @@ -681,11 +681,13 @@ typedef struct EMCreationContext
FormData_hypertable_compression *current_col_info;
} EMCreationContext;

/* get the segmentby compression info for an EquivalenceMember (EM) expr,
/* get the compression info for an EquivalenceMember (EM) expr,
* or return NULL if it's not one we can create an EM for
* This is applicable to segment by and compressed columns
* of the compressed table.
*/
static FormData_hypertable_compression *
segmentby_compression_info_for_em(Node *node, EMCreationContext *context)
get_compression_info_for_em(Node *node, EMCreationContext *context)
{
/* based on adjust_appendrel_attrs_mutator */
if (node == NULL)
Expand Down Expand Up @@ -714,10 +716,6 @@ segmentby_compression_info_for_em(Node *node, EMCreationContext *context)
if (col_info == NULL)
return NULL;

/* we can only add EMs for segmentby columns */
if (col_info->segmentby_column_index <= 0)
return NULL;

return col_info;
}

Expand All @@ -734,7 +732,6 @@ create_var_for_compressed_equivalence_member(Var *var, const EMCreationContext *
{
/* based on adjust_appendrel_attrs_mutator */
Assert(context->current_col_info != NULL);
Assert(context->current_col_info->segmentby_column_index > 0);
Assert(var->varno == context->uncompressed_relid_idx);
Assert(var->varattno > 0);

Expand Down Expand Up @@ -787,8 +784,7 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in
* be set on the em */
Assert(bms_overlap(cur_em->em_relids, uncompressed_chunk_relids));

context->current_col_info =
segmentby_compression_info_for_em((Node *) cur_em->em_expr, context);
context->current_col_info = get_compression_info_for_em((Node *) cur_em->em_expr, context);
if (context->current_col_info == NULL)
continue;

Expand Down
145 changes: 145 additions & 0 deletions tsl/test/expected/transparent_decompression_ordered_index-11.out
Expand Up @@ -567,6 +567,151 @@ ORDER BY time;
SET enable_mergejoin = TRUE;
SET enable_hashjoin = TRUE;
--end github issue 1558
-- github issue 2673
-- nested loop join with parameterized path
-- join condition has a segment by column and another column.
SET enable_hashjoin = false;
SET enable_mergejoin=false;
SET enable_material = false;
SET enable_seqscan = false;
-- restrict so that we select only 1 chunk.
:PREFIX
WITH lookup as ( SELECT * from (values( 3, 5) , (3, 4) ) as lu( did, version) )
SELECT met.*, lookup.*
FROM metrics_ordered_idx met join lookup
ON met.device_id = lookup.did and met.v0 = lookup.version
WHERE met.time > '2000-01-19 19:00:00-05'
and met.time < '2000-01-20 20:00:00-05';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=2 loops=1)
CTE lookup
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> CTE Scan on lookup (actual rows=2 loops=1)
-> Append (actual rows=1 loops=2)
-> Custom Scan (DecompressChunk) on _hyper_1_3_chunk met (actual rows=1 loops=2)
Filter: (("time" > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND ("time" < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone) AND (lookup.version = v0))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_8_chunk__compressed_hypertable_2_device_id__ts on compress_hyper_2_8_chunk (actual rows=1 loops=2)
Index Cond: (device_id = lookup.did)
Filter: ((_ts_meta_max_1 > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone))
(11 rows)

--add filter to segment by (device_id) and compressed attr column (v0)
:PREFIX
WITH lookup as ( SELECT * from (values( 3, 5) , (3, 4) ) as lu( did, version) )
SELECT met.*, lookup.*
FROM metrics_ordered_idx met join lookup
ON met.device_id = lookup.did and met.v0 = lookup.version
WHERE met.time > '2000-01-19 19:00:00-05'
and met.time < '2000-01-20 20:00:00-05'
and met.device_id = 3 and met.v0 = 5;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
CTE lookup
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> CTE Scan on lookup (actual rows=1 loops=1)
Filter: ((did = 3) AND (version = 5))
Rows Removed by Filter: 1
-> Append (actual rows=1 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_3_chunk met (actual rows=1 loops=1)
Filter: (("time" > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND ("time" < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone) AND (v0 = 5))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_8_chunk__compressed_hypertable_2_device_id__ts on compress_hyper_2_8_chunk (actual rows=1 loops=1)
Index Cond: (device_id = 3)
Filter: ((_ts_meta_max_1 > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone))
(13 rows)

:PREFIX
WITH lookup as ( SELECT * from (values( 3, 5) , (3, 4) ) as lu( did, version) )
SELECT met.*, lookup.*
FROM metrics_ordered_idx met join lookup
ON met.device_id = lookup.did and met.v0 = lookup.version
WHERE met.time = '2000-01-19 19:00:00-05'
and met.device_id = 3
and met.device_id_peer = 3 and met.v0 = 5;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
CTE lookup
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> CTE Scan on lookup (actual rows=1 loops=1)
Filter: ((did = 3) AND (version = 5))
Rows Removed by Filter: 1
-> Append (actual rows=0 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_3_chunk met (actual rows=0 loops=1)
Filter: ((v0 = 5) AND ("time" = 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone))
Rows Removed by Filter: 48
-> Index Scan using compress_hyper_2_8_chunk__compressed_hypertable_2_device_id_pee on compress_hyper_2_8_chunk (actual rows=1 loops=1)
Index Cond: (device_id_peer = 3)
Filter: ((device_id = 3) AND (_ts_meta_min_1 <= 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone))
(13 rows)

-- lateral subquery
:PREFIX
WITH f1 as ( SELECT * from (values( 7, 5, 4) , (4, 5, 5) ) as lu( device_id, device_id_peer, v0) )
SELECT * FROM metrics_ordered_idx met
JOIN LATERAL
( SELECT node, f1.* from nodetime , f1
WHERE node = f1.device_id) q
ON met.device_id = q.node and met.device_id_peer = q.device_id_peer
and met.v0 = q.v0 and met.v0 > 2 and time = '2018-01-19 20:00:00-05';
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = met.device_id)
CTE f1
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (nodetime.node = f1.device_id)
Rows Removed by Join Filter: 1
-> Seq Scan on nodetime (actual rows=1 loops=1)
-> CTE Scan on f1 (actual rows=2 loops=1)
-> Append (actual rows=1 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_4_chunk met (actual rows=1 loops=1)
Filter: ((v0 > 2) AND ("time" = 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (f1.device_id = device_id) AND (f1.v0 = v0))
Rows Removed by Filter: 47
-> Index Scan using compress_hyper_2_9_chunk__compressed_hypertable_2_device_id_pee on compress_hyper_2_9_chunk (actual rows=1 loops=1)
Index Cond: (device_id_peer = f1.device_id_peer)
Filter: ((_ts_meta_min_1 <= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Jan 19 17:00:00 2018 PST'::timestamp with time zone) AND (f1.device_id = device_id))
(16 rows)

-- filter on compressed attr (v0) with seqscan enabled and indexscan
-- disabled. filters on compressed attr should be above the seq scan.
SET enable_seqscan = true;
SET enable_indexscan = false;
:PREFIX
WITH lookup as ( SELECT * from (values( 3, 5) , (3, 4) ) as lu( did, version) )
SELECT met.*, lookup.*
FROM metrics_ordered_idx met join lookup
ON met.device_id = lookup.did and met.v0 = lookup.version
and met.device_id = 3
WHERE met.time > '2000-01-19 19:00:00-05'
and met.time < '2000-01-20 20:00:00-05'
and met.device_id = 3
and met.device_id_peer = 3 and met.v0 = 5;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
CTE lookup
-> Values Scan on "*VALUES*" (actual rows=2 loops=1)
-> CTE Scan on lookup (actual rows=1 loops=1)
Filter: ((did = 3) AND (version = 5))
Rows Removed by Filter: 1
-> Append (actual rows=1 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_1_3_chunk met (actual rows=1 loops=1)
Filter: (("time" > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND ("time" < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone) AND (v0 = 5))
Rows Removed by Filter: 47
-> Seq Scan on compress_hyper_2_8_chunk (actual rows=1 loops=1)
Filter: ((_ts_meta_max_1 > 'Wed Jan 19 16:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < 'Thu Jan 20 17:00:00 2000 PST'::timestamp with time zone) AND (device_id = 3) AND (device_id_peer = 3))
(12 rows)

RESET enable_hashjoin ;
RESET enable_mergejoin;
RESET enable_material ;
RESET enable_indexscan ;
--end github issue 2673
SET enable_seqscan = TRUE;
\ir include/transparent_decompression_constraintaware.sql
-- This file and its contents are licensed under the Timescale License.
Expand Down