From 28a11594cf94c59f41956bc66a5353474936c690 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Tue, 11 Jan 2022 14:58:07 +0000 Subject: [PATCH 1/8] Add const, list and op_expr node parsing cases to be able to classify remote quals properly --- src/deparse.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/src/deparse.c b/src/deparse.c index e813201d..cfa8e59b 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -218,6 +218,107 @@ multicorn_foreign_expr_walker(Node *node, } } break; + case T_Const: + { + Const *c = (Const *) node; + + /* TODO: see if Python FDW instance can handle interval type */ + if (c->consttype == INTERVALOID) + return false; + + /* + * If the constant has nondefault collation, either it's of a + * non-builtin type, or it reflects folding of a CollateExpr; + * either way, it's unsafe to send to the remote. + */ + if (c->constcollid != InvalidOid && + c->constcollid != DEFAULT_COLLATION_OID) + return false; + + /* Otherwise, we can consider that it doesn't set collation */ + collation = InvalidOid; + state = FDW_COLLATE_NONE; + } + break; + case T_OpExpr: + { + OpExpr *oe = (OpExpr *) node; + const char *operatorName = get_opname(oe->opno); + + /* + * TODO: Consult Python FDW instance on portability of various + * operators such as Factorial (!), Bitwise XOR (^), ILIKE, etc. + */ + if (strcmp(operatorName, "!") == 0 + || strcmp(operatorName, "^") == 0 + || strcmp(operatorName, "~~*") == 0 + || strcmp(operatorName, "!~~*") == 0) + { + return false; + } + + /* + * Similarly, only built-in operators can be sent to remote. + * (If the operator is, surely its underlying function is + * too.) + */ + if (!multicorn_is_builtin(oe->opno)) + return false; + + /* + * Recurse to input subexpressions. + */ + if (!multicorn_foreign_expr_walker((Node *) oe->args, + glob_cxt, &inner_cxt)) + return false; + + /* + * If operator's input collation is not derived from a foreign + * Var, it can't be sent to remote. + */ + if (oe->inputcollid == InvalidOid) + /* OK, inputs are all noncollatable */ ; + else if (inner_cxt.state != FDW_COLLATE_SAFE || + oe->inputcollid != inner_cxt.collation) + return false; + + /* Result-collation handling is same as for functions */ + collation = oe->opcollid; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (inner_cxt.state == FDW_COLLATE_SAFE && + collation == inner_cxt.collation) + state = FDW_COLLATE_SAFE; + else + state = FDW_COLLATE_UNSAFE; + } + break; + case T_List: + { + List *l = (List *) node; + ListCell *lc; + + /* + * Recurse to component subexpressions. + */ + foreach(lc, l) + { + if (!multicorn_foreign_expr_walker((Node *) lfirst(lc), + glob_cxt, &inner_cxt)) + return false; + } + + /* + * When processing a list, collation state just bubbles up + * from the list elements. + */ + collation = inner_cxt.collation; + state = inner_cxt.state; + + /* Don't apply exprType() to the list. */ + check_type = false; + } + break; case T_Aggref: { Aggref *agg = (Aggref *) node; From 6e8759a8fdc6eb26aedaa4d95c1a0e1047565e2f Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Thu, 13 Jan 2022 14:43:54 +0000 Subject: [PATCH 2/8] Working implementation of upper rel + qual clause pushdown combo --- src/multicorn.c | 70 +++++++++++++++++++++++++++++++++++++++++++------ src/multicorn.h | 11 ++++++++ src/python.c | 2 +- src/query.c | 8 +++--- 4 files changed, 78 insertions(+), 13 deletions(-) diff --git a/src/multicorn.c b/src/multicorn.c index 410e3f71..9f1de8d2 100644 --- a/src/multicorn.c +++ b/src/multicorn.c @@ -529,7 +529,7 @@ multicornGetForeignPlan(PlannerInfo *root, #endif ) { - MulticornPlanState *planstate = (MulticornPlanState *) foreignrel->fdw_private; + MulticornPlanState *ofpinfo, *planstate = (MulticornPlanState *) foreignrel->fdw_private; Index scan_relid; List *fdw_scan_tlist = NIL; ListCell *lc; @@ -581,7 +581,26 @@ multicornGetForeignPlan(PlannerInfo *root, /* Extract data needed for aggregations on the Python side */ if (IS_UPPER_REL(foreignrel)) { + /* + * TODO: fdw_scan_tlist is present in the execute phase as well, via + * node->ss.ps.plan.fdw_scan_tlist, and instead of root, one can employ + * rte = exec_rt_fetch(rtindex, estate) to fetch the column name through + * get_attname. + * + * Migrating the below function into multicornBeginForeignScan would thus + * reduce the duplication of plan and execute fields that are now being + * serialized. + */ multicorn_extract_upper_rel_info(root, fdw_scan_tlist, planstate); + + /* + * We have already identified remote conditions in MulticornGetForeignRelSize + * via multicorn_classify_conditions and need to use those, since scan_clauses + * are empty for upper relations for some reason. We pass the extracted clauses + * to the scan phase via serializePlanState. + */ + ofpinfo = (MulticornPlanState *) planstate->outerrel->fdw_private; + planstate->remote_conds = extract_actual_clauses(ofpinfo->remote_conds, false); } return make_foreignscan(tlist, @@ -632,6 +651,8 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) ForeignScan *fscan = (ForeignScan *) node->ss.ps.plan; MulticornExecState *execstate; ListCell *lc; + int rtindex; + List *clauses; execstate = initializeExecState(fscan->fdw_private); @@ -641,12 +662,24 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) */ if (fscan->scan.scanrelid > 0) { + /* + * Simple/base relation + */ + execstate->rel = node->ss.ss_currentRelation; execstate->tupdesc = RelationGetDescr(execstate->rel); initConversioninfo(execstate->cinfos, TupleDescGetAttInMetadata(execstate->tupdesc), NULL); + + // Needed for parsing quals + rtindex = fscan->scan.scanrelid; + clauses = fscan->fdw_exprs; } else { + /* + * Upper (aggregation) relation + */ + execstate->rel = NULL; #if (PG_VERSION_NUM >= 140000) execstate->tupdesc = multicorn_get_tupdesc_for_join_scan_tuples(node); @@ -654,17 +687,25 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) execstate->tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor; #endif initConversioninfo(execstate->cinfos, TupleDescGetAttInMetadata(execstate->tupdesc), execstate->upper_rel_targets); + + /* + * In case of a join or aggregate use the lowest-numbered member RTE out + * of all all the base relations participating in the underlying scan. + * NB: This may not work well in case of joins, keep an eye out for it. + */ + rtindex = bms_next_member(fscan->fs_relids, -1); + clauses = execstate->remote_conds; } execstate->values = palloc(sizeof(Datum) * execstate->tupdesc->natts); execstate->nulls = palloc(sizeof(bool) * execstate->tupdesc->natts); - execstate->qual_list = NULL; - foreach(lc, fscan->fdw_exprs) - { - extractRestrictions(bms_make_singleton(fscan->scan.scanrelid), - ((Expr *) lfirst(lc)), - &execstate->qual_list); - } + execstate->qual_list = NULL; + foreach(lc, clauses) + { + extractRestrictions(bms_make_singleton(rtindex), + ((Expr *) lfirst(lc)), + &execstate->qual_list); + } execstate->subscanCxt = AllocSetContextCreate( node->ss.ps.state->es_query_cxt, @@ -1980,6 +2021,8 @@ serializePlanState(MulticornPlanState * state) result = lappend(result, state->group_clauses); + result = lappend(result, state->remote_conds); + return result; } @@ -1996,6 +2039,16 @@ initializeExecState(void *internalstate) Oid foreigntableid = ((Const *) lsecond(values))->constvalue; List *pathkeys; + ForeignTable *ftable = GetForeignTable(foreigntableid); + Relation rel = RelationIdGetRelation(ftable->relid); + AttInMetadata *attinmeta; + + attinmeta = TupleDescGetAttInMetadata(rel->rd_att); + execstate->qual_cinfos = palloc0(sizeof(ConversionInfo *) * + attinmeta->tupdesc->natts); + initConversioninfo(execstate->qual_cinfos, attinmeta, NULL); + RelationClose(rel); + /* Those list must be copied, because their memory context can become */ /* invalid during the execution (in particular with the cursor interface) */ execstate->target_list = copyObject(lthird(values)); @@ -2011,5 +2064,6 @@ initializeExecState(void *internalstate) execstate->upper_rel_targets = list_nth(values, 4); execstate->aggs = list_nth(values, 5); execstate->group_clauses = list_nth(values, 6); + execstate->remote_conds = list_nth(values, 7); return execstate; } diff --git a/src/multicorn.h b/src/multicorn.h index 9f74eeb8..a4d1bc83 100644 --- a/src/multicorn.h +++ b/src/multicorn.h @@ -169,6 +169,12 @@ typedef struct MulticornExecState Datum *values; bool *nulls; ConversionInfo **cinfos; + /* + * In case of aggregations the upper rel target list does not correspond to + * the base table target list, so separate conversion information must be + * provided when converting the quals in the execute method. + */ + ConversionInfo **qual_cinfos; /* * List containing targets to be returned from Python in case of aggregations. * List elements are aggregation keys or group_clauses elements. @@ -187,6 +193,11 @@ typedef struct MulticornExecState * List elements are column names for grouping. */ List *group_clauses; + /* + * Remote conditions parsed in the MulticornGetForeignRelSize + */ + List *remote_conds; + /* Common buffer to avoid repeated allocations */ StringInfo buffer; AttrNumber rowidAttno; diff --git a/src/python.c b/src/python.c index 47a2c3b1..2046f5df 100644 --- a/src/python.c +++ b/src/python.c @@ -944,7 +944,7 @@ execute(ForeignScanState *node, ExplainState *es) } if (newqual != NULL) { - PyObject *python_qual = qualdefToPython((MulticornConstQual *) newqual, state->cinfos); + PyObject *python_qual = qualdefToPython((MulticornConstQual *) newqual, state->qual_cinfos); if (python_qual != NULL) { diff --git a/src/query.c b/src/query.c index 60423ff3..e54e99d0 100644 --- a/src/query.c +++ b/src/query.c @@ -122,10 +122,10 @@ initConversioninfo(ConversionInfo ** cinfos, AttInMetadata *attinmeta, List *upp if (upper_rel_targets) { /* - * For aggregations/groupings the targets lack attname, so we instead - * refer to the targets through references generated in - * multicorn_extract_upper_rel_info(). - */ + * For aggregations/groupings the targets lack attname, so we instead + * refer to the targets through references generated in + * multicorn_extract_upper_rel_info(). + */ attrname = strVal(list_nth(upper_rel_targets, i)); } From 3f3d4969ec22cb7553fe5e118b2596f959c3a5c9 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Fri, 14 Jan 2022 11:36:17 +0000 Subject: [PATCH 3/8] Simplify aggregation qual pushdown implementation --- src/deparse.c | 129 ------------------------------------------------ src/multicorn.c | 25 +++------- src/multicorn.h | 15 ++---- 3 files changed, 12 insertions(+), 157 deletions(-) diff --git a/src/deparse.c b/src/deparse.c index cfa8e59b..3357c015 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -76,34 +76,6 @@ typedef struct foreign_loc_cxt static Value *multicorn_deparse_function_name(Oid funcid); -/* - * Examine each qual clause in input_conds, and classify them into two groups, - * which are returned as two lists: - * - remote_conds contains expressions that can be evaluated remotely - * - local_conds contains expressions that can't be evaluated remotely - */ -void -multicorn_classify_conditions(PlannerInfo *root, - RelOptInfo *baserel, - List *input_conds, - List **remote_conds, - List **local_conds) -{ - ListCell *lc; - - *remote_conds = NIL; - *local_conds = NIL; - - foreach(lc, input_conds) - { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - - if (multicorn_is_foreign_expr(root, baserel, ri->clause)) - *remote_conds = lappend(*remote_conds, ri); - else - *local_conds = lappend(*local_conds, ri); - } -} /* * Return true if given object is one of PostgreSQL's built-in objects. @@ -218,107 +190,6 @@ multicorn_foreign_expr_walker(Node *node, } } break; - case T_Const: - { - Const *c = (Const *) node; - - /* TODO: see if Python FDW instance can handle interval type */ - if (c->consttype == INTERVALOID) - return false; - - /* - * If the constant has nondefault collation, either it's of a - * non-builtin type, or it reflects folding of a CollateExpr; - * either way, it's unsafe to send to the remote. - */ - if (c->constcollid != InvalidOid && - c->constcollid != DEFAULT_COLLATION_OID) - return false; - - /* Otherwise, we can consider that it doesn't set collation */ - collation = InvalidOid; - state = FDW_COLLATE_NONE; - } - break; - case T_OpExpr: - { - OpExpr *oe = (OpExpr *) node; - const char *operatorName = get_opname(oe->opno); - - /* - * TODO: Consult Python FDW instance on portability of various - * operators such as Factorial (!), Bitwise XOR (^), ILIKE, etc. - */ - if (strcmp(operatorName, "!") == 0 - || strcmp(operatorName, "^") == 0 - || strcmp(operatorName, "~~*") == 0 - || strcmp(operatorName, "!~~*") == 0) - { - return false; - } - - /* - * Similarly, only built-in operators can be sent to remote. - * (If the operator is, surely its underlying function is - * too.) - */ - if (!multicorn_is_builtin(oe->opno)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!multicorn_foreign_expr_walker((Node *) oe->args, - glob_cxt, &inner_cxt)) - return false; - - /* - * If operator's input collation is not derived from a foreign - * Var, it can't be sent to remote. - */ - if (oe->inputcollid == InvalidOid) - /* OK, inputs are all noncollatable */ ; - else if (inner_cxt.state != FDW_COLLATE_SAFE || - oe->inputcollid != inner_cxt.collation) - return false; - - /* Result-collation handling is same as for functions */ - collation = oe->opcollid; - if (collation == InvalidOid) - state = FDW_COLLATE_NONE; - else if (inner_cxt.state == FDW_COLLATE_SAFE && - collation == inner_cxt.collation) - state = FDW_COLLATE_SAFE; - else - state = FDW_COLLATE_UNSAFE; - } - break; - case T_List: - { - List *l = (List *) node; - ListCell *lc; - - /* - * Recurse to component subexpressions. - */ - foreach(lc, l) - { - if (!multicorn_foreign_expr_walker((Node *) lfirst(lc), - glob_cxt, &inner_cxt)) - return false; - } - - /* - * When processing a list, collation state just bubbles up - * from the list elements. - */ - collation = inner_cxt.collation; - state = inner_cxt.state; - - /* Don't apply exprType() to the list. */ - check_type = false; - } - break; case T_Aggref: { Aggref *agg = (Aggref *) node; diff --git a/src/multicorn.c b/src/multicorn.c index 9f1de8d2..761bac4a 100644 --- a/src/multicorn.c +++ b/src/multicorn.c @@ -402,16 +402,7 @@ multicornGetForeignRelSize(PlannerInfo *root, } - /* - * Identify which baserestrictinfo clauses can be sent to the remote - * server and which can't. - * TODO: for now all WHERE clauses will be classified as local conditions - * since multicorn_foreign_expr_walker lacks T_OpExpr and T_Const cases, - * thus preventing pushdown. When adding support for this make sure to align - * the code in multicorn_extract_upper_rel_info as well. - */ - multicorn_classify_conditions(root, baserel, baserel->baserestrictinfo, - &planstate->remote_conds, &planstate->local_conds); + planstate->baserestrictinfo = baserel->baserestrictinfo; /* Inject the "rows" and "width" attribute into the baserel */ #if PG_VERSION_NUM >= 90600 @@ -594,13 +585,11 @@ multicornGetForeignPlan(PlannerInfo *root, multicorn_extract_upper_rel_info(root, fdw_scan_tlist, planstate); /* - * We have already identified remote conditions in MulticornGetForeignRelSize - * via multicorn_classify_conditions and need to use those, since scan_clauses - * are empty for upper relations for some reason. We pass the extracted clauses - * to the scan phase via serializePlanState. + * Since scan_clauses are empty in case of upper relations for some + * reason. We pass the clauses from the base relation obtained in MulticornGetForeignRelSize. */ ofpinfo = (MulticornPlanState *) planstate->outerrel->fdw_private; - planstate->remote_conds = extract_actual_clauses(ofpinfo->remote_conds, false); + planstate->baserestrictinfo = extract_actual_clauses(ofpinfo->baserestrictinfo, false); } return make_foreignscan(tlist, @@ -694,7 +683,7 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) * NB: This may not work well in case of joins, keep an eye out for it. */ rtindex = bms_next_member(fscan->fs_relids, -1); - clauses = execstate->remote_conds; + clauses = execstate->baserestrictinfo; } execstate->values = palloc(sizeof(Datum) * execstate->tupdesc->natts); @@ -2021,7 +2010,7 @@ serializePlanState(MulticornPlanState * state) result = lappend(result, state->group_clauses); - result = lappend(result, state->remote_conds); + result = lappend(result, state->baserestrictinfo); return result; } @@ -2064,6 +2053,6 @@ initializeExecState(void *internalstate) execstate->upper_rel_targets = list_nth(values, 4); execstate->aggs = list_nth(values, 5); execstate->group_clauses = list_nth(values, 6); - execstate->remote_conds = list_nth(values, 7); + execstate->baserestrictinfo = list_nth(values, 7); return execstate; } diff --git a/src/multicorn.h b/src/multicorn.h index a4d1bc83..6d624ec6 100644 --- a/src/multicorn.h +++ b/src/multicorn.h @@ -118,9 +118,9 @@ typedef struct MulticornPlanState */ bool pushdown_safe; - /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ - List *remote_conds; - List *local_conds; + /* qual clauses */ + List *baserestrictinfo; + List *local_conds; /* Actual remote restriction clauses for scan (sans RestrictInfos) */ List *final_remote_exprs; @@ -194,9 +194,9 @@ typedef struct MulticornExecState */ List *group_clauses; /* - * Remote conditions parsed in the MulticornGetForeignRelSize + * Qual conditions parsed in the MulticornGetForeignRelSize */ - List *remote_conds; + List *baserestrictinfo; /* Common buffer to avoid repeated allocations */ StringInfo buffer; @@ -270,11 +270,6 @@ typedef struct MulticornDeparsedSortGroup } MulticornDeparsedSortGroup; /* deparse.c */ -extern void multicorn_classify_conditions(PlannerInfo *root, - RelOptInfo *baserel, - List *input_conds, - List **remote_conds, - List **local_conds); extern bool multicorn_is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr); From cbe45ed682dde713aa46a08205ee794c1d6f948a Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Mon, 17 Jan 2022 11:10:31 +0000 Subject: [PATCH 4/8] Enable pushdown of composite function names --- src/deparse.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/deparse.c b/src/deparse.c index 3357c015..091b095a 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -195,6 +195,7 @@ multicorn_foreign_expr_walker(Node *node, Aggref *agg = (Aggref *) node; ListCell *lc; char *opername = NULL; + StringInfo opername_composite = makeStringInfo(); Oid schema; /* get function name and schema */ @@ -211,11 +212,20 @@ multicorn_foreign_expr_walker(Node *node, if (schema != PG_CATALOG_NAMESPACE) return false; - /* Make sure the specific function at hand is shippable + /* Make sure the specific function at hand is shippable * NB: here we deviate from standard FDW code, since the allowed * function list is fetched from the Python FDW instance */ - if (!list_member(fpinfo->agg_functions, makeString(opername))) + if (agg->aggstar) + { + initStringInfo(opername_composite); + appendStringInfoString(opername_composite, opername); + appendStringInfoString(opername_composite, ".*"); + + if (!list_member(fpinfo->agg_functions, makeString(opername_composite->data))) + return false; + } + else if (!list_member(fpinfo->agg_functions, makeString(opername))) return false; /* Not safe to pushdown when not in grouping context */ @@ -227,10 +237,10 @@ multicorn_foreign_expr_walker(Node *node, return false; /* - * For now we don't push down DISTINCT or COUNT(*) aggregations. + * For now we don't push down DISTINCT aggregations. * TODO: Enable this */ - if (agg->aggdistinct || agg->aggstar) + if (agg->aggdistinct) return false; /* @@ -536,10 +546,17 @@ multicorn_extract_upper_rel_info(PlannerInfo *root, List *tlist, MulticornPlanSt aggref = (Aggref *) tle->expr; function = multicorn_deparse_function_name(aggref->aggfnoid); - var = linitial(pull_var_clause((Node *) aggref, + if (aggref->aggstar) + { + colname = makeString("*"); + } + else + { + var = linitial(pull_var_clause((Node *) aggref, PVC_RECURSE_AGGREGATES | PVC_RECURSE_PLACEHOLDERS)); - colname = colnameFromVar(var, root); + colname = colnameFromVar(var, root); + } initStringInfo(agg_key); appendStringInfoString(agg_key, strVal(function)); From 849bd2ec754830fc7ae4b20d1c2f5f43c95ce627 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Wed, 19 Jan 2022 14:17:43 +0000 Subject: [PATCH 5/8] Add cases for more node types in deparse walker This allows Multicorn to correctly classify the quals it supports into remote and local conditions. In turn, this allows us to decide whether pushdown is achievable or not. Additionally, inquire Python FDW instance about the qual operators it supports. --- python/multicorn/__init__.py | 5 + src/deparse.c | 198 +++++++++++++++++++++++++++++++++++ src/multicorn.c | 27 +++-- src/multicorn.h | 19 ++-- src/python.c | 34 +++++- 5 files changed, 262 insertions(+), 21 deletions(-) diff --git a/python/multicorn/__init__.py b/python/multicorn/__init__.py index 0ebcd28f..81ff3648 100755 --- a/python/multicorn/__init__.py +++ b/python/multicorn/__init__.py @@ -228,12 +228,17 @@ def can_pushdown_upperrel(self): : , ... }, + "supported_operators": [">", "<", "=", ...] } Each entry in `agg_functions` dict corresponds to a maping between the name of a aggregation function in PostgreSQL, and the equivalent foreign function. If no mapping exists for an aggregate function any queries containing it won't be pushed down. + + The `supported_operators` entry lists all operators that can be used + in qual (WHERE) clauses so that the aggregation pushdown will still + be supported. """ return None diff --git a/src/deparse.c b/src/deparse.c index 091b095a..b26ce41d 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -77,6 +77,36 @@ typedef struct foreign_loc_cxt static Value *multicorn_deparse_function_name(Oid funcid); +/* + * Examine each qual clause in input_conds, and classify them into two groups, + * which are returned as two lists: + * - remote_conds contains expressions that can be evaluated remotely + * - local_conds contains expressions that can't be evaluated remotely + */ +void +multicorn_classify_conditions(PlannerInfo *root, + RelOptInfo *baserel, + List *input_conds, + List **remote_conds, + List **local_conds) +{ + ListCell *lc; + + *remote_conds = NIL; + *local_conds = NIL; + + foreach(lc, input_conds) + { + RestrictInfo *ri = lfirst_node(RestrictInfo, lc); + + if (multicorn_is_foreign_expr(root, baserel, ri->clause)) + *remote_conds = lappend(*remote_conds, ri); + else + *local_conds = lappend(*local_conds, ri); + } +} + + /* * Return true if given object is one of PostgreSQL's built-in objects. * @@ -190,6 +220,174 @@ multicorn_foreign_expr_walker(Node *node, } } break; + case T_Const: + { + Const *c = (Const *) node; + + /* TODO: see if Python FDW instance can handle interval type */ + if (c->consttype == INTERVALOID) + return false; + + /* + * If the constant has nondefault collation, either it's of a + * non-builtin type, or it reflects folding of a CollateExpr; + * either way, it's unsafe to send to the remote. + */ + if (c->constcollid != InvalidOid && + c->constcollid != DEFAULT_COLLATION_OID) + return false; + + /* Otherwise, we can consider that it doesn't set collation */ + collation = InvalidOid; + state = FDW_COLLATE_NONE; + } + break; + case T_OpExpr: + { + OpExpr *oe = (OpExpr *) node; + char *operatorName = get_opname(oe->opno); + + /* + * Consult Python FDW instance on portability of the operator. + */ + if (!list_member(fpinfo->operators_supported, makeString(operatorName))) + return false; + + /* + * Similarly, only built-in operators can be sent to remote. + * (If the operator is, surely its underlying function is + * too.) + */ + if (!multicorn_is_builtin(oe->opno)) + return false; + + /* + * Recurse to input subexpressions. + */ + if (!multicorn_foreign_expr_walker((Node *) oe->args, + glob_cxt, &inner_cxt)) + return false; + + /* + * If operator's input collation is not derived from a foreign + * Var, it can't be sent to remote. + */ + if (oe->inputcollid == InvalidOid) + /* OK, inputs are all noncollatable */ ; + else if (inner_cxt.state != FDW_COLLATE_SAFE || + oe->inputcollid != inner_cxt.collation) + return false; + + /* Result-collation handling is same as for functions */ + collation = oe->opcollid; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (inner_cxt.state == FDW_COLLATE_SAFE && + collation == inner_cxt.collation) + state = FDW_COLLATE_SAFE; + else + state = FDW_COLLATE_UNSAFE; + } + break; + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *oe = (ScalarArrayOpExpr *) node; + + /* + * Again, only built-in operators can be sent to remote. + */ + if (!multicorn_is_builtin(oe->opno)) + return false; + + /* + * Recurse to input subexpressions. + */ + if (!multicorn_foreign_expr_walker((Node *) oe->args, + glob_cxt, &inner_cxt)) + return false; + + /* + * If operator's input collation is not derived from a foreign + * Var, it can't be sent to remote. + */ + if (oe->inputcollid == InvalidOid) + /* OK, inputs are all noncollatable */ ; + else if (inner_cxt.state != FDW_COLLATE_SAFE || + oe->inputcollid != inner_cxt.collation) + return false; + + /* Output is always boolean and so noncollatable. */ + collation = InvalidOid; + state = FDW_COLLATE_NONE; + } + break; + case T_RelabelType: + { + RelabelType *r = (RelabelType *) node; + + /* + * Recurse to input subexpression. + */ + if (!multicorn_foreign_expr_walker((Node *) r->arg, + glob_cxt, &inner_cxt)) + return false; + + /* + * RelabelType must not introduce a collation not derived from + * an input foreign Var. + */ + collation = r->resultcollid; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (inner_cxt.state == FDW_COLLATE_SAFE && + collation == inner_cxt.collation) + state = FDW_COLLATE_SAFE; + else + state = FDW_COLLATE_UNSAFE; + } + break; + case T_NullTest: + { + NullTest *nt = (NullTest *) node; + + /* + * Recurse to input subexpressions. + */ + if (!multicorn_foreign_expr_walker((Node *) nt->arg, + glob_cxt, &inner_cxt)) + return false; + + /* Output is always boolean and so noncollatable. */ + collation = InvalidOid; + state = FDW_COLLATE_NONE; + } + break; + case T_List: + { + List *l = (List *) node; + ListCell *lc; + + /* + * Recurse to component subexpressions. + */ + foreach(lc, l) + { + if (!multicorn_foreign_expr_walker((Node *) lfirst(lc), + glob_cxt, &inner_cxt)) + return false; + } + + /* + * When processing a list, collation state just bubbles up + * from the list elements. + */ + collation = inner_cxt.collation; + state = inner_cxt.state; + + /* Don't apply exprType() to the list. */ + check_type = false; + } + break; case T_Aggref: { Aggref *agg = (Aggref *) node; diff --git a/src/multicorn.c b/src/multicorn.c index 761bac4a..789fc64d 100644 --- a/src/multicorn.c +++ b/src/multicorn.c @@ -308,12 +308,12 @@ multicornGetForeignRelSize(PlannerInfo *root, /* Base foreign tables need to be push down always */ planstate->pushdown_safe = true; - /* Initialize upperrel pushdown info */ - planstate->groupby_supported = false; - planstate->agg_functions = NULL; - planstate->fdw_instance = getInstance(foreigntableid); planstate->foreigntableid = foreigntableid; + + /* Initialize upperrel pushdown info */ + initUpperrelPushdownInfo(planstate); + /* Initialize the conversion info array */ { Relation rel = RelationIdGetRelation(ftable->relid); @@ -402,7 +402,12 @@ multicornGetForeignRelSize(PlannerInfo *root, } - planstate->baserestrictinfo = baserel->baserestrictinfo; + /* + * Identify which baserestrictinfo clauses can be sent to the remote server + * and which can't. + */ + multicorn_classify_conditions(root, baserel, baserel->baserestrictinfo, + &planstate->remote_conds, &planstate->local_conds); /* Inject the "rows" and "width" attribute into the baserel */ #if PG_VERSION_NUM >= 90600 @@ -589,7 +594,7 @@ multicornGetForeignPlan(PlannerInfo *root, * reason. We pass the clauses from the base relation obtained in MulticornGetForeignRelSize. */ ofpinfo = (MulticornPlanState *) planstate->outerrel->fdw_private; - planstate->baserestrictinfo = extract_actual_clauses(ofpinfo->baserestrictinfo, false); + planstate->remote_conds = extract_actual_clauses(ofpinfo->remote_conds, false); } return make_foreignscan(tlist, @@ -683,7 +688,7 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) * NB: This may not work well in case of joins, keep an eye out for it. */ rtindex = bms_next_member(fscan->fs_relids, -1); - clauses = execstate->baserestrictinfo; + clauses = execstate->remote_conds; } execstate->values = palloc(sizeof(Datum) * execstate->tupdesc->natts); @@ -1882,12 +1887,12 @@ multicornGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage, /* * Check with the Python FDW instance whether it supports pushdown at all - * NB: Here we deviate from other FDWs, in that we don't know whether the + * NB: Here we deviate from other FDWs, in that we don't know whether * something can be pushed down without consulting the corresponding Python * FDW instance. */ - if (!canPushdownUpperrel((MulticornPlanState *) input_rel->fdw_private)) + if (!((MulticornPlanState *) input_rel->fdw_private)->upperrel_pushdown_supported) { return; } @@ -2010,7 +2015,7 @@ serializePlanState(MulticornPlanState * state) result = lappend(result, state->group_clauses); - result = lappend(result, state->baserestrictinfo); + result = lappend(result, state->remote_conds); return result; } @@ -2053,6 +2058,6 @@ initializeExecState(void *internalstate) execstate->upper_rel_targets = list_nth(values, 4); execstate->aggs = list_nth(values, 5); execstate->group_clauses = list_nth(values, 6); - execstate->baserestrictinfo = list_nth(values, 7); + execstate->remote_conds = list_nth(values, 7); return execstate; } diff --git a/src/multicorn.h b/src/multicorn.h index 6d624ec6..846beeff 100644 --- a/src/multicorn.h +++ b/src/multicorn.h @@ -101,8 +101,10 @@ typedef struct MulticornPlanState int width; /* Details about upperrel pushdown fetched from the Python FDW instance */ + bool upperrel_pushdown_supported; bool groupby_supported; List *agg_functions; + List *operators_supported; /* * Aggregation and grouping data to be passed to the execution phase. @@ -118,9 +120,9 @@ typedef struct MulticornPlanState */ bool pushdown_safe; - /* qual clauses */ - List *baserestrictinfo; - List *local_conds; + /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ + List *remote_conds; + List *local_conds; /* Actual remote restriction clauses for scan (sans RestrictInfos) */ List *final_remote_exprs; @@ -194,9 +196,9 @@ typedef struct MulticornExecState */ List *group_clauses; /* - * Qual conditions parsed in the MulticornGetForeignRelSize + * Remote conditions parsed in the MulticornGetForeignRelSize */ - List *baserestrictinfo; + List *remote_conds; /* Common buffer to avoid repeated allocations */ StringInfo buffer; @@ -270,6 +272,11 @@ typedef struct MulticornDeparsedSortGroup } MulticornDeparsedSortGroup; /* deparse.c */ +extern void multicorn_classify_conditions(PlannerInfo *root, + RelOptInfo *baserel, + List *input_conds, + List **remote_conds, + List **local_conds); extern bool multicorn_is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr); @@ -298,7 +305,7 @@ PyObject *tupleTableSlotToPyObject(TupleTableSlot *slot, ConversionInfo ** cin char *getRowIdColumn(PyObject *fdw_instance); PyObject *optionsListToPyDict(List *options); const char *getPythonEncodingName(void); -bool canPushdownUpperrel(MulticornPlanState * state); +void initUpperrelPushdownInfo(MulticornPlanState * state); void getRelSize(MulticornPlanState * state, PlannerInfo *root, diff --git a/src/python.c b/src/python.c index 2046f5df..211b97f7 100644 --- a/src/python.c +++ b/src/python.c @@ -1713,20 +1713,21 @@ canSort(MulticornPlanState * state, List *deparsed) * more granular conditional logic for assesing whether the particular query * is suitable for pushdown. */ -bool -canPushdownUpperrel(MulticornPlanState * state) +void +initUpperrelPushdownInfo(MulticornPlanState * state) { PyObject *fdw_instance = state->fdw_instance, *p_upperrel_pushdown, *p_object, *p_agg_funcs, *p_agg_func, + *p_ops, *p_item; Py_ssize_t i, size, strlength; char *tempbuffer; - StringInfo agg_func; + StringInfo agg_func, operator; bool pushdown_upperrel = false; p_upperrel_pushdown = PyObject_CallMethod(fdw_instance, "can_pushdown_upperrel", "()"); @@ -1769,11 +1770,36 @@ canPushdownUpperrel(MulticornPlanState * state) } Py_XDECREF(p_object); + /* Determine which operators are supported */ + p_ops = PyMapping_GetItemString(p_upperrel_pushdown, "operators_supported"); + if (p_ops != NULL && p_ops != Py_None) + { + size = PySequence_Size(p_ops); + + for (i = 0; i < size; i++) + { + operator = makeStringInfo(); + strlength = 0; + + p_item = PySequence_GetItem(p_ops, i); + p_object = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); + errorCheck(); + PyBytes_AsStringAndSize(p_object, &tempbuffer, &strlength); + appendBinaryStringInfo(operator, tempbuffer, strlength); + + state->operators_supported = lappend(state->operators_supported, makeString(operator->data)); + + Py_DECREF(p_object); + Py_DECREF(p_item); + } + } + Py_XDECREF(p_ops); + pushdown_upperrel = true; } Py_XDECREF(p_upperrel_pushdown); - return pushdown_upperrel; + state->upperrel_pushdown_supported = pushdown_upperrel; } PyObject * From ddee154a0ba99a24d5087131e04cc02c187c87bf Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Thu, 20 Jan 2022 08:38:37 +0000 Subject: [PATCH 6/8] Revert "Add cases for more node types in deparse walker" This reverts commit 849bd2ec754830fc7ae4b20d1c2f5f43c95ce627. --- python/multicorn/__init__.py | 5 - src/deparse.c | 198 ----------------------------------- src/multicorn.c | 27 ++--- src/multicorn.h | 19 ++-- src/python.c | 34 +----- 5 files changed, 21 insertions(+), 262 deletions(-) diff --git a/python/multicorn/__init__.py b/python/multicorn/__init__.py index 81ff3648..0ebcd28f 100755 --- a/python/multicorn/__init__.py +++ b/python/multicorn/__init__.py @@ -228,17 +228,12 @@ def can_pushdown_upperrel(self): : , ... }, - "supported_operators": [">", "<", "=", ...] } Each entry in `agg_functions` dict corresponds to a maping between the name of a aggregation function in PostgreSQL, and the equivalent foreign function. If no mapping exists for an aggregate function any queries containing it won't be pushed down. - - The `supported_operators` entry lists all operators that can be used - in qual (WHERE) clauses so that the aggregation pushdown will still - be supported. """ return None diff --git a/src/deparse.c b/src/deparse.c index b26ce41d..091b095a 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -77,36 +77,6 @@ typedef struct foreign_loc_cxt static Value *multicorn_deparse_function_name(Oid funcid); -/* - * Examine each qual clause in input_conds, and classify them into two groups, - * which are returned as two lists: - * - remote_conds contains expressions that can be evaluated remotely - * - local_conds contains expressions that can't be evaluated remotely - */ -void -multicorn_classify_conditions(PlannerInfo *root, - RelOptInfo *baserel, - List *input_conds, - List **remote_conds, - List **local_conds) -{ - ListCell *lc; - - *remote_conds = NIL; - *local_conds = NIL; - - foreach(lc, input_conds) - { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - - if (multicorn_is_foreign_expr(root, baserel, ri->clause)) - *remote_conds = lappend(*remote_conds, ri); - else - *local_conds = lappend(*local_conds, ri); - } -} - - /* * Return true if given object is one of PostgreSQL's built-in objects. * @@ -220,174 +190,6 @@ multicorn_foreign_expr_walker(Node *node, } } break; - case T_Const: - { - Const *c = (Const *) node; - - /* TODO: see if Python FDW instance can handle interval type */ - if (c->consttype == INTERVALOID) - return false; - - /* - * If the constant has nondefault collation, either it's of a - * non-builtin type, or it reflects folding of a CollateExpr; - * either way, it's unsafe to send to the remote. - */ - if (c->constcollid != InvalidOid && - c->constcollid != DEFAULT_COLLATION_OID) - return false; - - /* Otherwise, we can consider that it doesn't set collation */ - collation = InvalidOid; - state = FDW_COLLATE_NONE; - } - break; - case T_OpExpr: - { - OpExpr *oe = (OpExpr *) node; - char *operatorName = get_opname(oe->opno); - - /* - * Consult Python FDW instance on portability of the operator. - */ - if (!list_member(fpinfo->operators_supported, makeString(operatorName))) - return false; - - /* - * Similarly, only built-in operators can be sent to remote. - * (If the operator is, surely its underlying function is - * too.) - */ - if (!multicorn_is_builtin(oe->opno)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!multicorn_foreign_expr_walker((Node *) oe->args, - glob_cxt, &inner_cxt)) - return false; - - /* - * If operator's input collation is not derived from a foreign - * Var, it can't be sent to remote. - */ - if (oe->inputcollid == InvalidOid) - /* OK, inputs are all noncollatable */ ; - else if (inner_cxt.state != FDW_COLLATE_SAFE || - oe->inputcollid != inner_cxt.collation) - return false; - - /* Result-collation handling is same as for functions */ - collation = oe->opcollid; - if (collation == InvalidOid) - state = FDW_COLLATE_NONE; - else if (inner_cxt.state == FDW_COLLATE_SAFE && - collation == inner_cxt.collation) - state = FDW_COLLATE_SAFE; - else - state = FDW_COLLATE_UNSAFE; - } - break; - case T_ScalarArrayOpExpr: - { - ScalarArrayOpExpr *oe = (ScalarArrayOpExpr *) node; - - /* - * Again, only built-in operators can be sent to remote. - */ - if (!multicorn_is_builtin(oe->opno)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!multicorn_foreign_expr_walker((Node *) oe->args, - glob_cxt, &inner_cxt)) - return false; - - /* - * If operator's input collation is not derived from a foreign - * Var, it can't be sent to remote. - */ - if (oe->inputcollid == InvalidOid) - /* OK, inputs are all noncollatable */ ; - else if (inner_cxt.state != FDW_COLLATE_SAFE || - oe->inputcollid != inner_cxt.collation) - return false; - - /* Output is always boolean and so noncollatable. */ - collation = InvalidOid; - state = FDW_COLLATE_NONE; - } - break; - case T_RelabelType: - { - RelabelType *r = (RelabelType *) node; - - /* - * Recurse to input subexpression. - */ - if (!multicorn_foreign_expr_walker((Node *) r->arg, - glob_cxt, &inner_cxt)) - return false; - - /* - * RelabelType must not introduce a collation not derived from - * an input foreign Var. - */ - collation = r->resultcollid; - if (collation == InvalidOid) - state = FDW_COLLATE_NONE; - else if (inner_cxt.state == FDW_COLLATE_SAFE && - collation == inner_cxt.collation) - state = FDW_COLLATE_SAFE; - else - state = FDW_COLLATE_UNSAFE; - } - break; - case T_NullTest: - { - NullTest *nt = (NullTest *) node; - - /* - * Recurse to input subexpressions. - */ - if (!multicorn_foreign_expr_walker((Node *) nt->arg, - glob_cxt, &inner_cxt)) - return false; - - /* Output is always boolean and so noncollatable. */ - collation = InvalidOid; - state = FDW_COLLATE_NONE; - } - break; - case T_List: - { - List *l = (List *) node; - ListCell *lc; - - /* - * Recurse to component subexpressions. - */ - foreach(lc, l) - { - if (!multicorn_foreign_expr_walker((Node *) lfirst(lc), - glob_cxt, &inner_cxt)) - return false; - } - - /* - * When processing a list, collation state just bubbles up - * from the list elements. - */ - collation = inner_cxt.collation; - state = inner_cxt.state; - - /* Don't apply exprType() to the list. */ - check_type = false; - } - break; case T_Aggref: { Aggref *agg = (Aggref *) node; diff --git a/src/multicorn.c b/src/multicorn.c index 789fc64d..761bac4a 100644 --- a/src/multicorn.c +++ b/src/multicorn.c @@ -308,12 +308,12 @@ multicornGetForeignRelSize(PlannerInfo *root, /* Base foreign tables need to be push down always */ planstate->pushdown_safe = true; + /* Initialize upperrel pushdown info */ + planstate->groupby_supported = false; + planstate->agg_functions = NULL; + planstate->fdw_instance = getInstance(foreigntableid); planstate->foreigntableid = foreigntableid; - - /* Initialize upperrel pushdown info */ - initUpperrelPushdownInfo(planstate); - /* Initialize the conversion info array */ { Relation rel = RelationIdGetRelation(ftable->relid); @@ -402,12 +402,7 @@ multicornGetForeignRelSize(PlannerInfo *root, } - /* - * Identify which baserestrictinfo clauses can be sent to the remote server - * and which can't. - */ - multicorn_classify_conditions(root, baserel, baserel->baserestrictinfo, - &planstate->remote_conds, &planstate->local_conds); + planstate->baserestrictinfo = baserel->baserestrictinfo; /* Inject the "rows" and "width" attribute into the baserel */ #if PG_VERSION_NUM >= 90600 @@ -594,7 +589,7 @@ multicornGetForeignPlan(PlannerInfo *root, * reason. We pass the clauses from the base relation obtained in MulticornGetForeignRelSize. */ ofpinfo = (MulticornPlanState *) planstate->outerrel->fdw_private; - planstate->remote_conds = extract_actual_clauses(ofpinfo->remote_conds, false); + planstate->baserestrictinfo = extract_actual_clauses(ofpinfo->baserestrictinfo, false); } return make_foreignscan(tlist, @@ -688,7 +683,7 @@ multicornBeginForeignScan(ForeignScanState *node, int eflags) * NB: This may not work well in case of joins, keep an eye out for it. */ rtindex = bms_next_member(fscan->fs_relids, -1); - clauses = execstate->remote_conds; + clauses = execstate->baserestrictinfo; } execstate->values = palloc(sizeof(Datum) * execstate->tupdesc->natts); @@ -1887,12 +1882,12 @@ multicornGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage, /* * Check with the Python FDW instance whether it supports pushdown at all - * NB: Here we deviate from other FDWs, in that we don't know whether + * NB: Here we deviate from other FDWs, in that we don't know whether the * something can be pushed down without consulting the corresponding Python * FDW instance. */ - if (!((MulticornPlanState *) input_rel->fdw_private)->upperrel_pushdown_supported) + if (!canPushdownUpperrel((MulticornPlanState *) input_rel->fdw_private)) { return; } @@ -2015,7 +2010,7 @@ serializePlanState(MulticornPlanState * state) result = lappend(result, state->group_clauses); - result = lappend(result, state->remote_conds); + result = lappend(result, state->baserestrictinfo); return result; } @@ -2058,6 +2053,6 @@ initializeExecState(void *internalstate) execstate->upper_rel_targets = list_nth(values, 4); execstate->aggs = list_nth(values, 5); execstate->group_clauses = list_nth(values, 6); - execstate->remote_conds = list_nth(values, 7); + execstate->baserestrictinfo = list_nth(values, 7); return execstate; } diff --git a/src/multicorn.h b/src/multicorn.h index 846beeff..6d624ec6 100644 --- a/src/multicorn.h +++ b/src/multicorn.h @@ -101,10 +101,8 @@ typedef struct MulticornPlanState int width; /* Details about upperrel pushdown fetched from the Python FDW instance */ - bool upperrel_pushdown_supported; bool groupby_supported; List *agg_functions; - List *operators_supported; /* * Aggregation and grouping data to be passed to the execution phase. @@ -120,9 +118,9 @@ typedef struct MulticornPlanState */ bool pushdown_safe; - /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ - List *remote_conds; - List *local_conds; + /* qual clauses */ + List *baserestrictinfo; + List *local_conds; /* Actual remote restriction clauses for scan (sans RestrictInfos) */ List *final_remote_exprs; @@ -196,9 +194,9 @@ typedef struct MulticornExecState */ List *group_clauses; /* - * Remote conditions parsed in the MulticornGetForeignRelSize + * Qual conditions parsed in the MulticornGetForeignRelSize */ - List *remote_conds; + List *baserestrictinfo; /* Common buffer to avoid repeated allocations */ StringInfo buffer; @@ -272,11 +270,6 @@ typedef struct MulticornDeparsedSortGroup } MulticornDeparsedSortGroup; /* deparse.c */ -extern void multicorn_classify_conditions(PlannerInfo *root, - RelOptInfo *baserel, - List *input_conds, - List **remote_conds, - List **local_conds); extern bool multicorn_is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr); @@ -305,7 +298,7 @@ PyObject *tupleTableSlotToPyObject(TupleTableSlot *slot, ConversionInfo ** cin char *getRowIdColumn(PyObject *fdw_instance); PyObject *optionsListToPyDict(List *options); const char *getPythonEncodingName(void); -void initUpperrelPushdownInfo(MulticornPlanState * state); +bool canPushdownUpperrel(MulticornPlanState * state); void getRelSize(MulticornPlanState * state, PlannerInfo *root, diff --git a/src/python.c b/src/python.c index 211b97f7..2046f5df 100644 --- a/src/python.c +++ b/src/python.c @@ -1713,21 +1713,20 @@ canSort(MulticornPlanState * state, List *deparsed) * more granular conditional logic for assesing whether the particular query * is suitable for pushdown. */ -void -initUpperrelPushdownInfo(MulticornPlanState * state) +bool +canPushdownUpperrel(MulticornPlanState * state) { PyObject *fdw_instance = state->fdw_instance, *p_upperrel_pushdown, *p_object, *p_agg_funcs, *p_agg_func, - *p_ops, *p_item; Py_ssize_t i, size, strlength; char *tempbuffer; - StringInfo agg_func, operator; + StringInfo agg_func; bool pushdown_upperrel = false; p_upperrel_pushdown = PyObject_CallMethod(fdw_instance, "can_pushdown_upperrel", "()"); @@ -1770,36 +1769,11 @@ initUpperrelPushdownInfo(MulticornPlanState * state) } Py_XDECREF(p_object); - /* Determine which operators are supported */ - p_ops = PyMapping_GetItemString(p_upperrel_pushdown, "operators_supported"); - if (p_ops != NULL && p_ops != Py_None) - { - size = PySequence_Size(p_ops); - - for (i = 0; i < size; i++) - { - operator = makeStringInfo(); - strlength = 0; - - p_item = PySequence_GetItem(p_ops, i); - p_object = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); - errorCheck(); - PyBytes_AsStringAndSize(p_object, &tempbuffer, &strlength); - appendBinaryStringInfo(operator, tempbuffer, strlength); - - state->operators_supported = lappend(state->operators_supported, makeString(operator->data)); - - Py_DECREF(p_object); - Py_DECREF(p_item); - } - } - Py_XDECREF(p_ops); - pushdown_upperrel = true; } Py_XDECREF(p_upperrel_pushdown); - state->upperrel_pushdown_supported = pushdown_upperrel; + return pushdown_upperrel; } PyObject * From e87358e5525fbeba0bd7c702e167aa0eedd19ec7 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Thu, 20 Jan 2022 09:09:14 +0000 Subject: [PATCH 7/8] Implement a simpler way of checking pushability of quals under aggregations --- python/multicorn/__init__.py | 5 +++++ src/deparse.c | 16 ---------------- src/multicorn.c | 14 +++++++++----- src/multicorn.h | 2 +- src/python.c | 28 +++++++++++++++++++++++++++- 5 files changed, 42 insertions(+), 23 deletions(-) diff --git a/python/multicorn/__init__.py b/python/multicorn/__init__.py index 0ebcd28f..81ff3648 100755 --- a/python/multicorn/__init__.py +++ b/python/multicorn/__init__.py @@ -228,12 +228,17 @@ def can_pushdown_upperrel(self): : , ... }, + "supported_operators": [">", "<", "=", ...] } Each entry in `agg_functions` dict corresponds to a maping between the name of a aggregation function in PostgreSQL, and the equivalent foreign function. If no mapping exists for an aggregate function any queries containing it won't be pushed down. + + The `supported_operators` entry lists all operators that can be used + in qual (WHERE) clauses so that the aggregation pushdown will still + be supported. """ return None diff --git a/src/deparse.c b/src/deparse.c index 091b095a..60155a28 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -487,22 +487,6 @@ multicorn_build_tlist_to_deparse(RelOptInfo *foreignrel) if (IS_UPPER_REL(foreignrel)) return fpinfo->grouped_tlist; - /* - * We require columns specified in foreignrel->reltarget->exprs and those - * required for evaluating the local conditions. - */ - tlist = add_to_flat_tlist(tlist, - pull_var_clause((Node *) foreignrel->reltarget->exprs, - PVC_RECURSE_PLACEHOLDERS)); - foreach(lc, fpinfo->local_conds) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - - tlist = add_to_flat_tlist(tlist, - pull_var_clause((Node *) rinfo->clause, - PVC_RECURSE_PLACEHOLDERS)); - } - return tlist; } diff --git a/src/multicorn.c b/src/multicorn.c index 761bac4a..53f978ef 100644 --- a/src/multicorn.c +++ b/src/multicorn.c @@ -1684,12 +1684,16 @@ multicorn_foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, ofpinfo = (MulticornPlanState *) fpinfo->outerrel->fdw_private; /* - * If underlying scan relation has any local conditions, those conditions - * are required to be applied before performing aggregation. Hence the - * aggregate cannot be pushed down. + * If underlying scan relation has any quals with unsupported operators + * we cannot pushdown the aggregation. */ - if (ofpinfo->local_conds) - return false; + foreach(lc, ofpinfo->qual_list) + { + MulticornBaseQual *qual = (MulticornBaseQual *) lfirst(lc); + + if (!list_member(ofpinfo->operators_supported, makeString(qual->opname))) + return false; + } /* * Examine grouping expressions, as well as other expressions we'd need to diff --git a/src/multicorn.h b/src/multicorn.h index 6d624ec6..c507effe 100644 --- a/src/multicorn.h +++ b/src/multicorn.h @@ -103,6 +103,7 @@ typedef struct MulticornPlanState /* Details about upperrel pushdown fetched from the Python FDW instance */ bool groupby_supported; List *agg_functions; + List *operators_supported; /* * Aggregation and grouping data to be passed to the execution phase. @@ -120,7 +121,6 @@ typedef struct MulticornPlanState /* qual clauses */ List *baserestrictinfo; - List *local_conds; /* Actual remote restriction clauses for scan (sans RestrictInfos) */ List *final_remote_exprs; diff --git a/src/python.c b/src/python.c index 2046f5df..129de719 100644 --- a/src/python.c +++ b/src/python.c @@ -1721,12 +1721,13 @@ canPushdownUpperrel(MulticornPlanState * state) *p_object, *p_agg_funcs, *p_agg_func, + *p_ops, *p_item; Py_ssize_t i, size, strlength; char *tempbuffer; - StringInfo agg_func; + StringInfo agg_func, operator; bool pushdown_upperrel = false; p_upperrel_pushdown = PyObject_CallMethod(fdw_instance, "can_pushdown_upperrel", "()"); @@ -1769,6 +1770,31 @@ canPushdownUpperrel(MulticornPlanState * state) } Py_XDECREF(p_object); + /* Construct supported qual operators list */ + p_ops = PyMapping_GetItemString(p_upperrel_pushdown, "operators_supported"); + if (p_ops != NULL && p_ops != Py_None) + { + size = PySequence_Size(p_ops); + + for (i = 0; i < size; i++) + { + operator = makeStringInfo(); + strlength = 0; + + p_item = PySequence_GetItem(p_ops, i); + p_object = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); + errorCheck(); + PyBytes_AsStringAndSize(p_object, &tempbuffer, &strlength); + appendBinaryStringInfo(operator, tempbuffer, strlength); + + state->operators_supported = lappend(state->operators_supported, makeString(operator->data)); + + Py_DECREF(p_object); + Py_DECREF(p_item); + } + } + Py_XDECREF(p_ops); + pushdown_upperrel = true; } From a0e308a8be9e1f45c69d2d28c4a7299ce058315b Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Thu, 20 Jan 2022 14:49:36 +0000 Subject: [PATCH 8/8] Extract common function for py-to-pg list conversion --- src/python.c | 84 +++++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 47 deletions(-) diff --git a/src/python.c b/src/python.c index 129de719..36963196 100644 --- a/src/python.c +++ b/src/python.c @@ -1408,6 +1408,40 @@ pyobjectToDatum(PyObject *object, StringInfo buffer, return value; } +void +pythonUnicodeSequenceToList(PyObject *pySequence, List **target) +{ + PyObject *p_item, + *p_string; + Py_ssize_t i, + size, + strlength; + char *tempbuffer; + StringInfo element; + + if (pySequence != NULL && pySequence != Py_None) + { + size = PySequence_Size(pySequence); + + for (i = 0; i < size; i++) + { + element = makeStringInfo(); + strlength = 0; + + p_item = PySequence_GetItem(pySequence, i); + p_string = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); + errorCheck(); + PyBytes_AsStringAndSize(p_string, &tempbuffer, &strlength); + appendBinaryStringInfo(element, tempbuffer, strlength); + + *target = lappend(*target, makeString(element->data)); + + Py_DECREF(p_item); + Py_DECREF(p_string); + } + } +} + PyObject * datumStringToPython(Datum datum, ConversionInfo * cinfo) { @@ -1720,14 +1754,7 @@ canPushdownUpperrel(MulticornPlanState * state) *p_upperrel_pushdown, *p_object, *p_agg_funcs, - *p_agg_func, - *p_ops, - *p_item; - Py_ssize_t i, - size, - strlength; - char *tempbuffer; - StringInfo agg_func, operator; + *p_ops; bool pushdown_upperrel = false; p_upperrel_pushdown = PyObject_CallMethod(fdw_instance, "can_pushdown_upperrel", "()"); @@ -1748,51 +1775,14 @@ canPushdownUpperrel(MulticornPlanState * state) if (p_object != NULL && p_object != Py_None) { p_agg_funcs = PyMapping_Keys(p_object); - size = PySequence_Size(p_agg_funcs); - - for (i = 0; i < size; i++) - { - agg_func = makeStringInfo(); - strlength = 0; - - p_item = PySequence_GetItem(p_agg_funcs, i); - p_agg_func = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); - errorCheck(); - PyBytes_AsStringAndSize(p_agg_func, &tempbuffer, &strlength); - appendBinaryStringInfo(agg_func, tempbuffer, strlength); - - state->agg_functions = lappend(state->agg_functions, makeString(agg_func->data)); - - Py_DECREF(p_agg_func); - Py_DECREF(p_item); - } + pythonUnicodeSequenceToList(p_agg_funcs, &state->agg_functions); Py_DECREF(p_agg_funcs); } Py_XDECREF(p_object); /* Construct supported qual operators list */ p_ops = PyMapping_GetItemString(p_upperrel_pushdown, "operators_supported"); - if (p_ops != NULL && p_ops != Py_None) - { - size = PySequence_Size(p_ops); - - for (i = 0; i < size; i++) - { - operator = makeStringInfo(); - strlength = 0; - - p_item = PySequence_GetItem(p_ops, i); - p_object = PyUnicode_AsEncodedString(p_item, getPythonEncodingName(), NULL); - errorCheck(); - PyBytes_AsStringAndSize(p_object, &tempbuffer, &strlength); - appendBinaryStringInfo(operator, tempbuffer, strlength); - - state->operators_supported = lappend(state->operators_supported, makeString(operator->data)); - - Py_DECREF(p_object); - Py_DECREF(p_item); - } - } + pythonUnicodeSequenceToList(p_ops, &state->operators_supported); Py_XDECREF(p_ops); pushdown_upperrel = true;