Skip to content

Commit

Permalink
Fix SkipScan path generation with constant DISTINCT column
Browse files Browse the repository at this point in the history
When a DISTINCT query has a WHERE clause that constifies the
DISTINCT column the query might use an index that does not have
include the DISTINCT column even though it is referenced in the
ORDER BY clause. The skipscan path generation would error on any
path with such a configuration. This patch changes the path
generation code to skip generating SkipScan path under these
circumstances.

Fixes #3629
  • Loading branch information
svenklemm committed Oct 7, 2021
1 parent 74ca546 commit 43aebc3
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ accidentally triggering the load of a previous DB version.**
**Bugfixes**
* #3580 Fix memory context bug executing TRUNCATE
* #3654 Fix index attnum mapping in reorder_chunk
* #3661 Fix SkipScan path generation with constant DISTINCT column

**Thanks**
* @binakot and @sebvett for reporting an issue with DISTINCT queries

**Thanks**
* @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE
Expand Down
18 changes: 15 additions & 3 deletions tsl/src/nodes/skip_scan/planner.c
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,21 @@ build_skip_qual(PlannerInfo *root, SkipScanPath *skip_scan_path, IndexPath *inde
Oid column_type = exprType((Node *) var);
Oid column_collation = get_typcollation(column_type);
TypeCacheEntry *tce = lookup_type_cache(column_type, 0);

/*
* If we have an order by clause with columns that have constraints in the WHERE clause
* removing them from the pathkeys we might have a path with an index that produces the correct
* pathkeys for the target ordering without actually including all the columns of the ORDER BY
* in that case skipscan path generation is pointless.
*
* Example query:
* SELECT DISTINCT ON (a) * FROM test WHERE a in (2) ORDER BY a ASC, time DESC;
* Since a is always 2 due to the WHERE clause we can create the correct ordering for the
* ORDER BY with an index that does not include the a column and only includes the time column.
*/
int idx_key = get_idx_key(info, var->varattno);
if (idx_key < 0)
return false;

skip_scan_path->distinct_attno = var->varattno;
skip_scan_path->distinct_by_val = tce->typbyval;
Expand Down Expand Up @@ -586,9 +600,7 @@ get_idx_key(IndexOptInfo *idxinfo, AttrNumber attno)
if (attno == idxinfo->indexkeys[i])
return i;
}

elog(ERROR, "column not present in index: %d", attno);
pg_unreachable();
return -1;
}

/* Sort quals according to index column order.
Expand Down
32 changes: 32 additions & 0 deletions tsl/test/expected/plan_skip_scan-12.out
Original file line number Diff line number Diff line change
Expand Up @@ -3917,3 +3917,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text)
(15 rows)

-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)

INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)

SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

32 changes: 32 additions & 0 deletions tsl/test/expected/plan_skip_scan-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -3909,3 +3909,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text)
(15 rows)

-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)

INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)

SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

32 changes: 32 additions & 0 deletions tsl/test/expected/plan_skip_scan-14.out
Original file line number Diff line number Diff line change
Expand Up @@ -3909,3 +3909,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text)
(15 rows)

-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)

INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)

SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

8 changes: 8 additions & 0 deletions tsl/test/sql/plan_skip_scan.sql.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,11 @@
-- try one query with EXPLAIN only for coverage
EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan;
EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan_ht;

-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;

0 comments on commit 43aebc3

Please sign in to comment.