Skip to content

Commit

Permalink
Improve cagg query chunk exclusion
Browse files Browse the repository at this point in the history
This patch changes the time_bucket exclusion in cagg queries to
distinguish between < and <=. Previously those were treated the
same leading to failure to exclude chunks when the cosntraints
where exactly at the bucket boundary.
  • Loading branch information
svenklemm committed Jun 28, 2023
1 parent e2e7e5f commit 4bb126b
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 282 deletions.
2 changes: 2 additions & 0 deletions .unreleased/bugfix_5824
@@ -0,0 +1,2 @@
Fixes: #5824 Improve cagg query chunk exclusion

164 changes: 70 additions & 94 deletions src/planner/expand_hypertable.c
Expand Up @@ -154,6 +154,30 @@ is_timestamptz_op_interval(Expr *expr)
(c1->consttype == INTERVALOID && c2->consttype == TIMESTAMPTZOID);
}

static Datum
int_get_datum(int64 value, Oid type)
{
switch (type)
{
case INT2OID:
return Int16GetDatum(value);
case INT4OID:
return Int32GetDatum(value);
case INT8OID:
return Int64GetDatum(value);
case TIMESTAMPOID:
return TimestampGetDatum(value);
case TIMESTAMPTZOID:
return TimestampTzGetDatum(value);
}

ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("can only use int_get_datum with integer types")));

pg_unreachable();

Check warning on line 178 in src/planner/expand_hypertable.c

View check run for this annotation

Codecov / codecov/patch

src/planner/expand_hypertable.c#L178

Added line #L178 was not covered by tests
}

static int64
const_datum_get_int(Const *cnst)
{
Expand All @@ -167,6 +191,12 @@ const_datum_get_int(Const *cnst)
return (int64) (DatumGetInt32(cnst->constvalue));
case INT8OID:
return DatumGetInt64(cnst->constvalue);
case DATEOID:
return DatumGetDateADT(cnst->constvalue);
case TIMESTAMPOID:
return DatumGetTimestamp(cnst->constvalue);
case TIMESTAMPTZOID:
return DatumGetTimestampTz(cnst->constvalue);
}

ereport(ERROR,
Expand Down Expand Up @@ -423,55 +453,32 @@ transform_time_bucket_comparison(PlannerInfo *root, OpExpr *op)
switch (tce->type_id)
{
case INT2OID:
integralValue = const_datum_get_int(castNode(Const, value));
integralWidth = const_datum_get_int(width);

if (integralValue >= PG_INT16_MAX - integralWidth)
return op;

datum = Int16GetDatum(integralValue + integralWidth);
subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
tce->typlen,
datum,
false,
tce->typbyval);
break;

case INT4OID:
integralValue = const_datum_get_int(castNode(Const, value));
integralWidth = const_datum_get_int(width);

if (integralValue >= PG_INT32_MAX - integralWidth)
return op;

datum = Int32GetDatum(integralValue + integralWidth);
subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
tce->typlen,
datum,
false,
tce->typbyval);
break;
case INT8OID:
integralValue = const_datum_get_int(castNode(Const, value));
integralWidth = const_datum_get_int(width);

if (integralValue >= PG_INT64_MAX - integralWidth)
if (integralValue >= ts_time_get_max(tce->type_id) - integralWidth)
return op;

datum = Int64GetDatum(integralValue + integralWidth);
/* When the time_bucket comparison is at the bucket boundary and we have a less than
* constraint we can skip adding the additional bucket in all other cases we need
* to add the interval.
*/
if (strategy == BTLessStrategyNumber && integralValue % integralWidth == 0)
datum = int_get_datum(integralValue, tce->type_id);
else
datum = int_get_datum(integralValue + integralWidth, tce->type_id);

subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
tce->typlen,
datum,
false,
tce->typbyval);

break;

case DATEOID:
{
Interval *interval = DatumGetIntervalP(width->constvalue);
Expand All @@ -486,14 +493,22 @@ transform_time_bucket_comparison(PlannerInfo *root, OpExpr *op)
if (interval->time >= 0x3FFFFFFFFFFFFFll)
return op;

if (DatumGetDateADT(castNode(Const, value)->constvalue) >=
(TS_DATE_END - interval->day +
ceil((double) interval->time / (double) USECS_PER_DAY)))
integralValue = const_datum_get_int(castNode(Const, value));
integralWidth =
interval->day + ceil((double) interval->time / (double) USECS_PER_DAY);

if (integralValue >= (TS_DATE_END - integralWidth))
return op;

datum = DateADTGetDatum(DatumGetDateADT(castNode(Const, value)->constvalue) +
interval->day +
ceil((double) interval->time / (double) USECS_PER_DAY));
/* When the time_bucket comparison is at the bucket boundary and we have a less than
* constraint we can skip adding the additional bucket in all other cases we need
* to add the interval.
*/
if (strategy == BTLessStrategyNumber && integralValue % integralWidth == 0)
datum = DateADTGetDatum(integralValue);
else
datum = DateADTGetDatum(integralValue + integralWidth);

subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
Expand All @@ -504,6 +519,7 @@ transform_time_bucket_comparison(PlannerInfo *root, OpExpr *op)

break;
}
case TIMESTAMPOID:
case TIMESTAMPTZOID:
{
Interval *interval = DatumGetIntervalP(width->constvalue);
Expand All @@ -519,74 +535,33 @@ transform_time_bucket_comparison(PlannerInfo *root, OpExpr *op)
/*
* If width interval has day component we merge it with time component
*/

integralWidth = interval->time;
if (interval->day != 0)
{
width = copyObject(width);
interval = DatumGetIntervalP(width->constvalue);

/*
* if our transformed restriction would overflow we skip adding it
*/
if (interval->time >= PG_INT64_MAX - interval->day * USECS_PER_DAY)
if (interval->time >= TS_TIMESTAMP_END - interval->day * USECS_PER_DAY)
return op;

interval->time += interval->day * USECS_PER_DAY;
interval->day = 0;
integralWidth += interval->day * USECS_PER_DAY;
}

if (DatumGetTimestampTz(castNode(Const, value)->constvalue) >=
(TS_TIMESTAMP_END - interval->time))
return op;

datum = TimestampTzGetDatum(
DatumGetTimestampTz(castNode(Const, value)->constvalue) + interval->time);
subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
tce->typlen,
datum,
false,
tce->typbyval);

break;
}

case TIMESTAMPOID:
{
Interval *interval = DatumGetIntervalP(width->constvalue);

Assert(width->consttype == INTERVALOID);
integralValue = const_datum_get_int(castNode(Const, value));

/*
* Optimization can't be applied when interval has month component.
*/
if (interval->month != 0)
if (integralValue >= (TS_TIMESTAMP_END - integralWidth))
return op;

/*
* If width interval has day component we merge it with time component
/* When the time_bucket comparison is at the bucket boundary and we have a less than
* constraint we can skip adding the additional bucket in all other cases we need
* to add the interval.
*/
if (interval->day != 0)
{
width = copyObject(width);
interval = DatumGetIntervalP(width->constvalue);

/*
* if our merged value overflows we skip adding it
*/
if (interval->time >= PG_INT64_MAX - interval->day * USECS_PER_DAY)
return op;

interval->time += interval->day * USECS_PER_DAY;
interval->day = 0;
}

if (DatumGetTimestamp(castNode(Const, value)->constvalue) >=
(TS_TIMESTAMP_END - interval->time))
return op;
if (strategy == BTLessStrategyNumber && integralValue % integralWidth == 0)
datum = int_get_datum(integralValue, tce->type_id);
else
datum = int_get_datum(integralValue + integralWidth, tce->type_id);

datum = TimestampGetDatum(DatumGetTimestamp(castNode(Const, value)->constvalue) +
interval->time);
subst = (Expr *) makeConst(tce->type_id,
-1,
InvalidOid,
Expand All @@ -597,6 +572,7 @@ transform_time_bucket_comparison(PlannerInfo *root, OpExpr *op)

break;
}

default:
return op;
break;
Expand Down
80 changes: 33 additions & 47 deletions test/expected/plan_expand_hypertable-12.out
Expand Up @@ -1054,16 +1054,13 @@ must not exclude chunks on m2
\qecho time_bucket exclusion
time_bucket exclusion
:PREFIX SELECT * FROM hyper WHERE time_bucket(10, time) < 10::bigint ORDER BY time;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
QUERY PLAN
--------------------------------------------------------------------------------------------------
Sort
Sort Key: _hyper_1_1_chunk."time"
-> Append
-> Seq Scan on _hyper_1_1_chunk
Filter: (("time" < '20'::bigint) AND (time_bucket('10'::bigint, "time") < '10'::bigint))
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" < '20'::bigint) AND (time_bucket('10'::bigint, "time") < '10'::bigint))
(7 rows)
-> Seq Scan on _hyper_1_1_chunk
Filter: (("time" < '10'::bigint) AND (time_bucket('10'::bigint, "time") < '10'::bigint))
(4 rows)

:PREFIX SELECT * FROM hyper WHERE time_bucket(10, time) < 11::bigint ORDER BY time;
QUERY PLAN
Expand Down Expand Up @@ -1094,16 +1091,13 @@ time_bucket exclusion
(9 rows)

:PREFIX SELECT * FROM hyper WHERE 10::bigint > time_bucket(10, time) ORDER BY time;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
QUERY PLAN
--------------------------------------------------------------------------------------------------
Sort
Sort Key: _hyper_1_1_chunk."time"
-> Append
-> Seq Scan on _hyper_1_1_chunk
Filter: (("time" < '20'::bigint) AND ('10'::bigint > time_bucket('10'::bigint, "time")))
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" < '20'::bigint) AND ('10'::bigint > time_bucket('10'::bigint, "time")))
(7 rows)
-> Seq Scan on _hyper_1_1_chunk
Filter: (("time" < '10'::bigint) AND ('10'::bigint > time_bucket('10'::bigint, "time")))
(4 rows)

:PREFIX SELECT * FROM hyper WHERE 11::bigint > time_bucket(10, time) ORDER BY time;
QUERY PLAN
Expand Down Expand Up @@ -1230,67 +1224,59 @@ transformation would be out of range
Sort Key: _hyper_1_2_chunk."time"
-> Append
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_3_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_4_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_5_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_6_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_7_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_8_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_9_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_10_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
-> Seq Scan on _hyper_1_11_chunk
Filter: (("time" > 10) AND ("time" < '110'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
(23 rows)
Filter: (("time" > 10) AND ("time" < '100'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 100))
(21 rows)

:PREFIX SELECT * FROM hyper WHERE time_bucket(10, time) > 10 AND time_bucket(10, time) < 20 ORDER BY time;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: _hyper_1_2_chunk."time"
-> Append
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 10) AND ("time" < '30'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 20))
-> Seq Scan on _hyper_1_3_chunk
Filter: (("time" > 10) AND ("time" < '30'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 20))
(7 rows)
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 10) AND ("time" < '20'::bigint) AND (time_bucket('10'::bigint, "time") > 10) AND (time_bucket('10'::bigint, "time") < 20))
(4 rows)

:PREFIX SELECT * FROM hyper WHERE time_bucket(1, time) > 11 AND time_bucket(1, time) < 19 ORDER BY time;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: _hyper_1_2_chunk."time"
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 11) AND ("time" < '20'::bigint) AND (time_bucket('1'::bigint, "time") > 11) AND (time_bucket('1'::bigint, "time") < 19))
Filter: (("time" > 11) AND ("time" < '19'::bigint) AND (time_bucket('1'::bigint, "time") > 11) AND (time_bucket('1'::bigint, "time") < 19))
(4 rows)

:PREFIX SELECT * FROM hyper WHERE 10 < time_bucket(10, time) AND 20 > time_bucket(10,time) ORDER BY time;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: _hyper_1_2_chunk."time"
-> Append
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 10) AND ("time" < '30'::bigint) AND (10 < time_bucket('10'::bigint, "time")) AND (20 > time_bucket('10'::bigint, "time")))
-> Seq Scan on _hyper_1_3_chunk
Filter: (("time" > 10) AND ("time" < '30'::bigint) AND (10 < time_bucket('10'::bigint, "time")) AND (20 > time_bucket('10'::bigint, "time")))
(7 rows)
-> Seq Scan on _hyper_1_2_chunk
Filter: (("time" > 10) AND ("time" < '20'::bigint) AND (10 < time_bucket('10'::bigint, "time")) AND (20 > time_bucket('10'::bigint, "time")))
(4 rows)

\qecho time_bucket exclusion with date
time_bucket exclusion with date
:PREFIX SELECT * FROM metrics_date WHERE time_bucket('1d',time) < '2000-01-03' ORDER BY time;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Index Only Scan Backward using _hyper_8_171_chunk_metrics_date_time_idx on _hyper_8_171_chunk
Index Cond: ("time" < '01-04-2000'::date)
Index Cond: ("time" < '01-03-2000'::date)
Filter: (time_bucket('@ 1 day'::interval, "time") < '01-03-2000'::date)
(3 rows)

Expand All @@ -1313,7 +1299,7 @@ time_bucket exclusion with timestamp
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Index Only Scan Backward using _hyper_5_155_chunk_metrics_timestamp_time_idx on _hyper_5_155_chunk
Index Cond: ("time" < 'Tue Jan 04 00:00:00 2000'::timestamp without time zone)
Index Cond: ("time" < 'Mon Jan 03 00:00:00 2000'::timestamp without time zone)
Filter: (time_bucket('@ 1 day'::interval, "time") < 'Mon Jan 03 00:00:00 2000'::timestamp without time zone)
(3 rows)

Expand Down

0 comments on commit 4bb126b

Please sign in to comment.