Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve infering start and stop arguments from gapfill query #2059

Merged
merged 1 commit into from Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
112 changes: 103 additions & 9 deletions tsl/src/nodes/gapfill/exec.c
Expand Up @@ -374,6 +374,99 @@ get_boundary_expr_value(GapFillState *state, GapFillBoundary boundary, Expr *exp
return gapfill_datum_get_internal(arg_value, state->gapfill_typid);
}

typedef struct CollectBoundaryContext
{
List *quals;
Var *ts_var;
} CollectBoundaryContext;

/*
* expression references our gapfill time column and could be
* a boundary expression, more thorough check is in
* infer_gapfill_boundary
*/
static bool
is_boundary_expr(Node *node, CollectBoundaryContext *context)
{
OpExpr *op;
Node *left, *right;

if (!IsA(node, OpExpr))
return false;

op = castNode(OpExpr, node);

if (op->args->length != 2)
return false;

left = linitial(op->args);
right = llast(op->args);

/* Var OP Var is not useful here because we are not yet at a point
* where we could evaluate them */
if (IsA(left, Var) && IsA(right, Var))
return false;

if (IsA(left, Var) && var_equal(castNode(Var, left), context->ts_var))
return true;

if (IsA(right, Var) && var_equal(castNode(Var, right), context->ts_var))
return true;

return false;
}

static bool
collect_boundary_walker(Node *node, CollectBoundaryContext *context)
{
Node *quals = NULL;

if (node == NULL)
return false;

if (IsA(node, FromExpr))
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: elide braces, like above.

quals = castNode(FromExpr, node)->quals;
}
else if (IsA(node, JoinExpr))
{
JoinExpr *j = castNode(JoinExpr, node);

/* don't descend into outer join */
if (IS_OUTER_JOIN(j->jointype))
return false;

quals = j->quals;
}

if (quals)
{
ListCell *lc;

foreach (lc, castNode(List, quals))
{
if (is_boundary_expr(lfirst(lc), context))
context->quals = lappend(context->quals, lfirst(lc));
}
}

return expression_tree_walker(node, collect_boundary_walker, context);
}

/*
* traverse jointree to look for expressions referencing
* the time column of our gapfill call
*/
static List *
collect_boundary_expressions(Node *node, Var *ts_var)
{
CollectBoundaryContext context = { .quals = NIL, .ts_var = ts_var };

collect_boundary_walker(node, &context);

return context.quals;
}

static int64
infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
{
Expand All @@ -385,6 +478,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY);
int strategy;
Oid lefttype, righttype;
List *quals;

int64 boundary_value = 0;
bool boundary_found = false;
Expand All @@ -403,19 +497,16 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)

ts_var = castNode(Var, lsecond(func->args));

foreach (lc, (List *) jt->quals)
quals = collect_boundary_expressions((Node *) jt, ts_var);

foreach (lc, quals)
{
OpExpr *opexpr;
OpExpr *opexpr = lfirst_node(OpExpr, lc);
Var *var;
Expr *expr;
Oid op;
int64 value;

if (!IsA(lfirst(lc), OpExpr))
continue;

opexpr = lfirst(lc);

if (IsA(linitial(opexpr->args), Var))
{
var = linitial(opexpr->args);
Expand All @@ -429,7 +520,11 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
op = get_commutator(opexpr->opno);
}
else
{
/* collect_boundary_expressions has filtered those out already */
Assert(false);
continue;
}

if (!op_in_opfamily(op, tce->btree_opf))
continue;
Expand Down Expand Up @@ -483,8 +578,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)

ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid time_bucket_gapfill argument: could not infer %s boundary from WHERE "
"clause",
errmsg("missing time_bucket_gapfill argument: could not infer %s from WHERE clause",
boundary == GAPFILL_START ? "start" : "finish"),
errhint("You can either pass start and finish as arguments or in the WHERE clause")));
pg_unreachable();
Expand Down
55 changes: 40 additions & 15 deletions tsl/test/expected/gapfill.out
Expand Up @@ -217,12 +217,12 @@ SELECT
time_bucket_gapfill(1,time,NULL,11)
FROM (VALUES (1),(2)) v(time)
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
SELECT
time_bucket_gapfill(1,time,1,NULL)
FROM (VALUES (1),(2)) v(time)
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- test 0 bucket_width
SELECT
time_bucket_gapfill(0,time,1,11)
Expand Down Expand Up @@ -1701,21 +1701,21 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start/finish and no usable time constraints
SELECT
time_bucket_gapfill(1,t,NULL,NULL)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- no start and no usable time constraints
SELECT
time_bucket_gapfill(1,t,finish:=1)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start expression and no usable time constraints
SELECT
time_bucket_gapfill(1,t,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END,1)
Expand All @@ -1736,7 +1736,7 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL finish expression and no usable time constraints
SELECT
time_bucket_gapfill(1,t,1,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END)
Expand All @@ -1757,21 +1757,21 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- NULL finish and no usable time constraints
SELECT
time_bucket_gapfill(1,t,1,NULL)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- expression with column reference on right side
SELECT
time_bucket_gapfill(1,t)
FROM (VALUES (1),(2)) v(t)
WHERE t > t AND t < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- expression with cast
SELECT
time_bucket_gapfill(1,t1::int8)
Expand Down Expand Up @@ -1820,7 +1820,7 @@ SELECT
FROM metrics_int
WHERE time =0 AND time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- time_bucket_gapfill with constraints ORed
SELECT
time_bucket_gapfill(1::int8,t::int8)
Expand All @@ -1835,22 +1835,47 @@ SELECT
FROM metrics_int m, metrics_int m2
WHERE m.time >=0 AND m.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test inner join and where clause doesnt match gapfill argument
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
time_bucket_gapfill
---------------------
0
1
(2 rows)

-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
\set ON_ERROR_STOP 1
time_bucket_gapfill
---------------------
0
1
(2 rows)

-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)
Expand Down
21 changes: 18 additions & 3 deletions tsl/test/sql/gapfill.sql
Expand Up @@ -1150,15 +1150,30 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;

-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;

\set ON_ERROR_STOP 1

-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;

-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;

\set ON_ERROR_STOP 1

-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)
Expand Down