Skip to content

Commit

Permalink
Fix crash when partializing agg with HAVING
Browse files Browse the repository at this point in the history
This change fixes an assertion-based crash that happened when using
the `partialize_agg` function together with HAVING clauses. For instance,

```
SELECT time_bucket('1 day', time), device,
__timescaledb_internal.partialize_agg(avg(temp))
GROUP BY 1, 2
HAVING avg(temp) > 3;
```

would crash because the HAVING clause's aggregate didn't have its
`Aggref` node set to partial aggregate mode.

Regular partial aggregations executed by the planner (i.e., those not
induced by the `partialize_agg` function) have their HAVING aggs
transparently moved to the target list during planning so that the
finalize node can use it when applying the final filter on the
resulting groups. However, it doesn't make much sense to transparently
do that when partializing with `partialize_agg` since it would be odd
to return more columns than requested by the user. Therefore, the
caller would have to do that manually. This, in fact, is also done
when materializing continuous aggregates.

For this reason, HAVING clauses with `partialize_agg` are blocked,
except in cases where the planner transparently reduces the HAVING
expression to a simple filter (e.g., `HAVING device > 3`).

Apart from fixing this issue, this change also refectors some of the
related code and adds tests for some error cases.
  • Loading branch information
erimatnor committed Nov 28, 2019
1 parent e82b94a commit 96f9efe
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 56 deletions.
186 changes: 134 additions & 52 deletions src/plan_partialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
#include <postgres.h>
#include <catalog/pg_type.h>
#include <nodes/nodeFuncs.h>
#include <nodes/nodes.h>
#include <nodes/pg_list.h>
#include <nodes/relation.h>
#include <optimizer/planner.h>
#include <optimizer/cost.h>
#include <optimizer/var.h>
#include <optimizer/tlist.h>
#include <parser/parse_func.h>
#include <utils/lsyscache.h>

Expand All @@ -15,15 +21,30 @@
#include "utils.h"

#define TS_PARTIALFN "partialize_agg"

typedef struct PartializeWalkerState
{
bool found_partialize;
bool found_non_partial_agg;
bool looking_for_agg;
Oid fnoid;
} PartializeWalkerState;

/*
* Look for the partialize function in a target list and mark the wrapped
* aggregate as a partial aggregate.
*
* The partialize function is an expression of the form:
*
* _timescaledb_internal.partialize_agg(avg(temp))
*
* where avg(temp) can be replaced by any aggregate that can be partialized.
*
* When such an expression is found, this function will mark the Aggref node
* for the aggregate as partial.
*/
static bool
partialize_function_call_walker(Node *node, PartializeWalkerState *state)
check_for_partialize_function_call(Node *node, PartializeWalkerState *state)
{
if (node == NULL)
return false;
Expand All @@ -32,86 +53,147 @@ partialize_function_call_walker(Node *node, PartializeWalkerState *state)
* If the last node we saw was partialize, the next one must be aggregate
* we're partializing
*/
if (state->looking_for_agg)
{
Aggref *agg_ref;
if (state->looking_for_agg && !IsA(node, Aggref))
elog(ERROR, "the input to partialize must be an aggregate");

if (!IsA(node, Aggref))
elog(ERROR, "The input to partialize must be an aggregate");
if (IsA(node, Aggref))
{
Aggref *aggref = castNode(Aggref, node);

agg_ref = castNode(Aggref, node);
agg_ref->aggsplit = AGGSPLIT_INITIAL_SERIAL;
if (agg_ref->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(AGGSPLIT_INITIAL_SERIAL))
agg_ref->aggtype = BYTEAOID;
else
agg_ref->aggtype = agg_ref->aggtranstype;
if (state->looking_for_agg)
{
state->looking_for_agg = false;
aggref->aggsplit = AGGSPLIT_INITIAL_SERIAL;

if (aggref->aggtranstype == INTERNALOID &&
DO_AGGSPLIT_SERIALIZE(AGGSPLIT_INITIAL_SERIAL))
aggref->aggtype = BYTEAOID;
else
aggref->aggtype = aggref->aggtranstype;
}

state->looking_for_agg = false;
/* We currently cannot handle cases like
* SELECT sum(i), partialize(sum(i)) ...
*
* We check for non-partial aggs to ensure that if any of the aggregates
* in a statement are partialized, all of them have to be.
*/
else if (aggref->aggsplit != AGGSPLIT_INITIAL_SERIAL)
state->found_non_partial_agg = true;
}
else if (IsA(node, FuncExpr) && ((FuncExpr *) node)->funcid == state->fnoid)
{
state->found_partialize = true;
state->looking_for_agg = true;
}

return expression_tree_walker((Node *) node, partialize_function_call_walker, state);
return expression_tree_walker(node, check_for_partialize_function_call, state);
}

/* We currently cannot handle cases like
* SELECT sum(i), partialize(sum(i)) ...
* instead we use this function to ensure that if any of the aggregates in a statement are
* partialized, all of them are
*/
static bool
ensure_only_partials(Node *node, void *state)
{
if (node == NULL)
return false;

if (IsA(node, Aggref) && castNode(Aggref, node)->aggsplit != AGGSPLIT_INITIAL_SERIAL)
elog(ERROR, "Cannot mix partialized and non-partialized aggregates in the same statement");

return expression_tree_walker((Node *) node, ensure_only_partials, state);
}

void
ts_plan_process_partialize_agg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *output_rel)
has_partialize_function(Query *parse)
{
Oid partialfnoid = InvalidOid;
Oid argtyp[] = { ANYELEMENTOID };
Query *parse = root->parse;

PartializeWalkerState state = { .found_partialize = false,
.found_non_partial_agg = false,
.looking_for_agg = false,
.fnoid = InvalidOid };
List *name = list_make2(makeString(INTERNAL_SCHEMA_NAME), makeString(TS_PARTIALFN));
ListCell *lc;

if (CMD_SELECT != parse->commandType)
return;
partialfnoid = LookupFuncName(name, lengthof(argtyp), argtyp, false);
Assert(partialfnoid != InvalidOid);

state.fnoid = partialfnoid;
partialize_function_call_walker((Node *) parse->targetList, &state);
check_for_partialize_function_call((Node *) parse->targetList, &state);

if (state.found_partialize && state.found_non_partial_agg)
elog(ERROR, "cannot mix partialized and non-partialized aggregates in the same statement");

if (state.found_partialize)
return state.found_partialize;
}

/*
* Modify all AggPaths in relation to use partial aggregation.
*/
static void
partialize_agg_paths(RelOptInfo *rel)
{
ListCell *lc;

foreach (lc, rel->pathlist)
{
ensure_only_partials((Node *) parse->targetList, NULL);
Path *path = lfirst(lc);

foreach (lc, input_rel->pathlist)
{
Path *path = lfirst(lc);
if (IsA(path, AggPath))
castNode(AggPath, path)->aggsplit = AGGSPLIT_INITIAL_SERIAL;
}
}

if (IsA(path, AggPath))
((AggPath *) path)->aggsplit = AGGSPLIT_INITIAL_SERIAL;
}
/*
* Turn an aggregate relation into a partial aggregate relation if aggregates
* are enclosed by the partialize_agg function.
*
* The partialize_agg function can "manually" partialize an aggregate. For
* instance:
*
* SELECT time_bucket('1 day', time), device,
* __timescaledb_internal.partialize_agg(avg(temp))
* GROUP BY 1, 2;
*
* Would compute the partial aggregate of avg(temp).
*
* The plan to compute the relation must be either entirely non-partial or
* entirely partial, so it is not possible to mix partials and
* non-partials. Note that aggregates can appear in both the target list and the
* HAVING clause, for instance:
*
* SELECT time_bucket('1 day', time), device, avg(temp)
* GROUP BY 1, 2
* HAVING avg(temp) > 3;
*
* Regular partial aggregations executed by the planner (i.e., those not induced
* by the partialize_agg function) have their HAVING aggregates transparently
* moved to the target list during planning so that the finalize node can use it
* when applying the final filter on the resulting groups, obviously omitting
* the extra columns in the final output/projection. However, it doesn't make
* much sense to transparently do that when partializing with partialize_agg
* since it would be odd to return more columns than requested by the
* user. Therefore, the caller would have to do that manually. This, in fact, is
* also done when materializing continuous aggregates.
*
* For this reason, HAVING clauses with partialize_agg are blocked, except in
* cases where the planner transparently reduces the having expression to a
* simple filter (e.g., HAVING device > 3). In such cases, the HAVING clause is
* removed and replaced by a filter on the input.
*/
void
ts_plan_process_partialize_agg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *output_rel)
{
Query *parse = root->parse;

foreach (lc, output_rel->pathlist)
{
Path *path = lfirst(lc);
#if PG10_GE
Assert(IS_UPPER_REL(output_rel));
#endif

if (IsA(path, AggPath))
((AggPath *) path)->aggsplit = AGGSPLIT_INITIAL_SERIAL;
}
if (CMD_SELECT != parse->commandType || !parse->hasAggs)
return;

if (has_partialize_function(parse))
{
/* We cannot check root->hasHavingqual here because sometimes the
* planner can replace the HAVING clause with a simple filter. But
* root->hashavingqual stays true to remember that the query had a
* HAVING clause initially. */
if (NULL != parse->havingQual)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot partialize aggregate with HAVING clause"),
errhint(
"Any aggregates in a HAVING clause need to be partialized in the output "
"target list.")));

partialize_agg_paths(input_rel);
partialize_agg_paths(output_rel);
}
}
8 changes: 6 additions & 2 deletions src/planner.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,8 +704,11 @@ timescale_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, Re
if (output_rel->pathlist != NIL)
output_rel->pathlist = replace_hypertable_insert_paths(root, output_rel->pathlist);

/* modify aggregates that need to be partialized */
ts_plan_process_partialize_agg(root, input_rel, output_rel);
if (parse->hasAggs)
{
/* modify aggregates that need to be partialized */
ts_plan_process_partialize_agg(root, input_rel, output_rel);
}
}

if (ts_guc_disable_optimizations || input_rel == NULL || IS_DUMMY_REL(input_rel))
Expand All @@ -717,6 +720,7 @@ timescale_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, Re
if (UPPERREL_GROUP_AGG == stage && output_rel != NULL)
{
ts_plan_add_hashagg(root, input_rel, output_rel);

if (parse->hasAggs)
ts_preprocess_first_last_aggregates(root, root->processed_tlist);
}
Expand Down
25 changes: 23 additions & 2 deletions tsl/test/expected/partialize_finalize.out
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,31 @@ select length(_timescaledb_internal.partialize_agg(min(a+1))) from foo;

\set ON_ERROR_STOP 0
select length(_timescaledb_internal.partialize_agg(1+min(a))) from foo;
ERROR: The input to partialize must be an aggregate
ERROR: the input to partialize must be an aggregate
select length(_timescaledb_internal.partialize_agg(min(a)+min(a))) from foo;
ERROR: The input to partialize must be an aggregate
ERROR: the input to partialize must be an aggregate
--non-trivial HAVING clause not allowed with partialize_agg
select time_bucket('1 hour', b) as b, _timescaledb_internal.partialize_agg(avg(a))
from foo
group by 1
having avg(a) > 3;
ERROR: cannot partialize aggregate with HAVING clause
--mixing partialized and non-partialized aggs is not allowed
select time_bucket('1 hour', b) as b, _timescaledb_internal.partialize_agg(avg(a)), sum(a)
from foo
group by 1;
ERROR: cannot mix partialized and non-partialized aggregates in the same statement
\set ON_ERROR_STOP 1
--partializing works with HAVING when the planner can effectively
--reduce it. In this case to a simple filter.
select time_bucket('1 hour', b) as b, toastval, _timescaledb_internal.partialize_agg(avg(a))
from foo
group by b, toastval
having toastval LIKE 'does not exist';
b | toastval | partialize_agg
---+----------+----------------
(0 rows)

--
-- TEST FINALIZEFUNC_EXTRA
--
Expand Down
16 changes: 16 additions & 0 deletions tsl/test/sql/partialize_finalize.sql
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,24 @@ select length(_timescaledb_internal.partialize_agg(min(a+1))) from foo;
\set ON_ERROR_STOP 0
select length(_timescaledb_internal.partialize_agg(1+min(a))) from foo;
select length(_timescaledb_internal.partialize_agg(min(a)+min(a))) from foo;
--non-trivial HAVING clause not allowed with partialize_agg
select time_bucket('1 hour', b) as b, _timescaledb_internal.partialize_agg(avg(a))
from foo
group by 1
having avg(a) > 3;
--mixing partialized and non-partialized aggs is not allowed
select time_bucket('1 hour', b) as b, _timescaledb_internal.partialize_agg(avg(a)), sum(a)
from foo
group by 1;
\set ON_ERROR_STOP 1

--partializing works with HAVING when the planner can effectively
--reduce it. In this case to a simple filter.
select time_bucket('1 hour', b) as b, toastval, _timescaledb_internal.partialize_agg(avg(a))
from foo
group by b, toastval
having toastval LIKE 'does not exist';

--
-- TEST FINALIZEFUNC_EXTRA
--
Expand Down

0 comments on commit 96f9efe

Please sign in to comment.