Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Fix poor performance in maintenance tasks
Browse files Browse the repository at this point in the history
The maintenance tasks were not performing well due to
issue timescale/timescaledb#3404. Namely, the qualifier
on series_id were not being pushed down correctly to
compressed chunks on the metric table. So for example,
the old plan for the delete_expired_series query was

```
Update on container_memory_failures_total  (cost=0.00..39538450.88 rows=1 width=210)
   ->  Nested Loop Semi Join  (cost=0.00..39538450.88 rows=1 width=210)
         Join Filter: (container_memory_failures_total.id = potentially_drop_series.series_id)
         ->  Nested Loop  (cost=0.00..50249.71 rows=1148685 width=168)
               ->  Seq Scan on ids_epoch  (cost=0.00..1.01 rows=1 width=14)
               ->  Seq Scan on container_memory_failures_total  (cost=0.00..38761.85 rows=1148685 width=154)
                     Filter: (delete_epoch IS NULL)
         ->  Materialize  (cost=0.00..39470970.90 rows=1 width=50)
               ->  Nested Loop Anti Join  (cost=0.00..39470970.89 rows=1 width=50)
                     Join Filter: (data_exists.series_id = potentially_drop_series.series_id)
                     ->  Subquery Scan on potentially_drop_series  (cost=0.00..0.07 rows=1 width=40)
                           ->  Limit  (cost=0.00..0.06 rows=1 width=12)
                                 ->  HashSetOp Except  (cost=0.00..0.06 rows=1 width=12)
                                       ->  Append  (cost=0.00..0.05 rows=2 width=12)
                                             ->  Subquery Scan on "*SELECT* 1"  (cost=0.00..0.02 rows=1 width=12)
                                                   ->  HashAggregate  (cost=0.00..0.01 rows=1 width=0)
                                                         Group Key: series_id
                                                         ->  Result  (cost=0.00..0.00 rows=0 width=0)
                                                               One-Time Filter: false
                                             ->  Subquery Scan on "*SELECT* 2"  (cost=0.00..0.02 rows=1 width=12)
                                                   ->  HashAggregate  (cost=0.00..0.01 rows=1 width=0)
                                                         Group Key: series_id
                                                         ->  Result  (cost=0.00..0.00 rows=0 width=0)
                                                               One-Time Filter: false
                     ->  Append  (cost=0.00..11565989.70 rows=2232398490 width=18)
                           ->  Seq Scan on container_memory_failures_total data_exists_1  (cost=0.00..0.00 rows=1 width=18)
                                 Filter: ("time" >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                           ->  Custom Scan (DecompressChunk) on _hyper_318_62_chunk data_exists_2  (cost=0.04..2660.36 rows=67068000 width=18)
                                 Filter: ("time" >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                                 ->  Seq Scan on compress_hyper_333_9206_chunk  (cost=0.00..2660.36 rows=67068 width=136)
                                       Filter: (_ts_meta_max_1 >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                           ->  Custom Scan (DecompressChunk) on _hyper_318_4497_chunk data_exists_3  (cost=0.04..2698.91 rows=67432000 width=18)
                                 Filter: ("time" >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                                 ->  Seq Scan on compress_hyper_333_14717_chunk  (cost=0.00..2698.91 rows=67432 width=136)
                                       Filter: (_ts_meta_max_1 >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
....
```

Notice the lack of quals on the compressed chunks.

The new plan is:
```
Update on container_memory_failures_total  (cost=3.03..6.28 rows=1 width=228)
   ->  Nested Loop  (cost=3.03..6.28 rows=1 width=228)
         ->  Nested Loop  (cost=3.03..5.26 rows=1 width=214)
               ->  HashAggregate  (cost=2.60..2.61 rows=1 width=68)
                     Group Key: potentially_drop_series.series_id
                     ->  Nested Loop Left Join  (cost=2.51..2.60 rows=1 width=68)
                           Filter: (ex.indicator IS NULL)
                           ->  Subquery Scan on potentially_drop_series  (cost=0.00..0.07 rows=1 width=40)
                                 ->  Limit  (cost=0.00..0.06 rows=1 width=12)
                                       ->  HashSetOp Except  (cost=0.00..0.06 rows=1 width=12)
                                             ->  Append  (cost=0.00..0.05 rows=2 width=12)
                                                   ->  Subquery Scan on "*SELECT* 1"  (cost=0.00..0.02 rows=1 width=12)
                                                         ->  HashAggregate  (cost=0.00..0.01 rows=1 width=0)
                                                               Group Key: series_id
                                                               ->  Result  (cost=0.00..0.00 rows=0 width=0)
                                                                     One-Time Filter: false
                                                   ->  Subquery Scan on "*SELECT* 2"  (cost=0.00..0.02 rows=1 width=12)
                                                         ->  HashAggregate  (cost=0.00..0.01 rows=1 width=0)
                                                               Group Key: series_id
                                                               ->  Result  (cost=0.00..0.00 rows=0 width=0)
                                                                     One-Time Filter: false
                           ->  Subquery Scan on ex  (cost=2.51..2.52 rows=1 width=32)
                                 ->  Limit  (cost=2.51..2.51 rows=1 width=12)
                                       ->  Custom Scan (ChunkAppend) on container_memory_failures_total data_exists  (cost=2.51..2.51 rows=1000 width=12)
                                             Order: data_exists."time"
                                             ->  Custom Scan (DecompressChunk) on _hyper_318_62_chunk data_exists_1  (cost=2.51..2.51 rows=1000 width=8)
                                                   Filter: ("time" >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                                                   ->  Index Scan using compress_hyper_333_9206_chunk__compressed_hypertable_333_series on compress_hyper_333_9206_chunk  (cost=0.29..2.51 rows=1 width=56)
                                                         Index Cond: (series_id = potentially_drop_series.series_id)
                                                         Filter: (_ts_meta_max_1 >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                                             ->  Custom Scan (DecompressChunk) on _hyper_318_4497_chunk data_exists_2  (cost=2.51..2.51 rows=1000 width=8)
                                                   Filter: ("time" >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
                                                   ->  Index Scan using compress_hyper_333_14717_chunk__compressed_hypertable_333_serie on compress_hyper_333_14717_chunk  (cost=0.29..2.51 rows=1 width=56)
                                                         Index Cond: (series_id = potentially_drop_series.series_id)
                                                         Filter: (_ts_meta_max_1 >= '2021-02-11 13:11:07.779863+00'::timestamp with time zone)
....
```

We also include a small optimization to not execute the label delete if
the array of labels is empty.

I think this will speed up the maintenance tasks quite a bit and thus
Fixes quite a few issues.

Fixes #663, #506, #596
  • Loading branch information
cevian committed Jul 16, 2021
1 parent 82f64c9 commit 08261e9
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 43 deletions.
4 changes: 2 additions & 2 deletions pkg/migrations/migration_files_generated.go

Large diffs are not rendered by default.

91 changes: 50 additions & 41 deletions pkg/migrations/sql/idempotent/base.sql
Expand Up @@ -1449,14 +1449,15 @@ BEGIN
), confirmed_drop_series AS (
SELECT series_id
FROM potentially_drop_series
WHERE NOT EXISTS (
LEFT JOIN LATERAL (
SELECT 1
FROM SCHEMA_DATA.%1$I data_exists
WHERE data_exists.series_id = potentially_drop_series.series_id AND time >= %3$L
--use chunk append + more likely to find something starting at earliest time
ORDER BY time ASC
LIMIT 1
)
) lateral_exists(indicator) ON (TRUE)
WHERE lateral_exists.indicator IS NULL
) -- we want this next statement to be the last one in the txn since it could block series fetch (both of them update delete_epoch)
UPDATE SCHEMA_DATA_SERIES.%1$I SET delete_epoch = current_epoch+1
FROM SCHEMA_CATALOG.ids_epoch
Expand Down Expand Up @@ -1495,13 +1496,20 @@ BEGIN
EXECUTE format($query$
-- recheck that the series IDs we might delete are actually dead
WITH dead_series AS (
SELECT id FROM SCHEMA_DATA_SERIES.%1$I
SELECT potential.id
FROM
(
SELECT id
FROM SCHEMA_DATA_SERIES.%1$I
WHERE delete_epoch <= %2$L
AND NOT EXISTS (
SELECT 1 FROM SCHEMA_DATA.%1$I
WHERE id = series_id
LIMIT 1
)
) as potential
LEFT JOIN LATERAL (
SELECT 1
FROM SCHEMA_DATA.%1$I metric_data
WHERE metric_data.series_id = potential.id
LIMIT 1
) as lateral_exists(indicator) ON (TRUE)
WHERE indicator IS NULL
), deleted_series AS (
DELETE FROM SCHEMA_DATA_SERIES.%1$I
WHERE delete_epoch <= %2$L
Expand All @@ -1518,41 +1526,42 @@ BEGIN
$query$, metric_table, deletion_epoch) INTO label_array;



--jit interacts poorly why the multi-partition query below
SET LOCAL jit = 'off';
--needs to be a separate query and not a CTE since this needs to "see"
--the series rows deleted above as deleted.
--Note: we never delete metric name keys since there are check constraints that
--rely on those ids not changing.
EXECUTE format($query$
WITH check_local_series AS (
--the series table from which we just deleted is much more likely to have the label, so check that first to exclude most labels.
SELECT label_id
FROM unnest($1) as labels(label_id)
WHERE NOT EXISTS (
SELECT 1
FROM SCHEMA_DATA_SERIES.%1$I series_exists_local
WHERE series_exists_local.labels && ARRAY[labels.label_id]
LIMIT 1
IF array_length(label_array, 1) > 0 THEN
--jit interacts poorly why the multi-partition query below
SET LOCAL jit = 'off';
--needs to be a separate query and not a CTE since this needs to "see"
--the series rows deleted above as deleted.
--Note: we never delete metric name keys since there are check constraints that
--rely on those ids not changing.
EXECUTE format($query$
WITH check_local_series AS (
--the series table from which we just deleted is much more likely to have the label, so check that first to exclude most labels.
SELECT label_id
FROM unnest($1) as labels(label_id)
WHERE NOT EXISTS (
SELECT 1
FROM SCHEMA_DATA_SERIES.%1$I series_exists_local
WHERE series_exists_local.labels && ARRAY[labels.label_id]
LIMIT 1
)
),
confirmed_drop_labels AS (
--do the global check to confirm
SELECT label_id
FROM check_local_series
WHERE NOT EXISTS (
SELECT 1
FROM SCHEMA_CATALOG.series series_exists
WHERE series_exists.labels && ARRAY[label_id]
LIMIT 1
)
)
),
confirmed_drop_labels AS (
--do the global check to confirm
SELECT label_id
FROM check_local_series
WHERE NOT EXISTS (
SELECT 1
FROM SCHEMA_CATALOG.series series_exists
WHERE series_exists.labels && ARRAY[label_id]
LIMIT 1
)
)
DELETE FROM SCHEMA_CATALOG.label
WHERE id IN (SELECT * FROM confirmed_drop_labels) AND key != '__name__';
$query$, metric_table) USING label_array;
DELETE FROM SCHEMA_CATALOG.label
WHERE id IN (SELECT * FROM confirmed_drop_labels) AND key != '__name__';
$query$, metric_table) USING label_array;

SET LOCAL jit = DEFAULT;
SET LOCAL jit = DEFAULT;
END IF;

UPDATE SCHEMA_CATALOG.ids_epoch
SET (current_epoch, last_update_time) = (next_epoch, now())
Expand Down

0 comments on commit 08261e9

Please sign in to comment.