Skip to content

Commit

Permalink
performance[rust]: prune unneeded projections (#5032)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 29, 2022
1 parent 1729256 commit b067b15
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,11 @@ impl ProjectionPushDown {
use ALogicalPlan::*;

match logical_plan {
Projection { expr, input, .. } => {
Projection {
expr,
input,
schema,
} => {
// A projection can consist of a chain of expressions followed by an alias.
// We want to do the chain locally because it can have complicated side effects.
// The only thing we push down is the root name of the projection.
Expand All @@ -305,6 +309,26 @@ impl ProjectionPushDown {
}
}
}

// don't do projection that is not used in upstream selection
if projections_seen > 0 {
// TODO! investigate why this can fail
// TODO! make it return an option.
let output_field = expr_arena.get(*e).to_field(
schema.as_ref(),
Context::Default,
expr_arena,
);

if let Ok(output_field) = output_field {
let output_name = output_field.name();
let is_used_upstream =
projected_names.contains(output_name.as_str());
if !is_used_upstream {
continue;
}
};
}
}

add_expr_to_accumulated(
Expand All @@ -319,7 +343,7 @@ impl ProjectionPushDown {
input,
acc_projections,
projected_names,
projections_seen,
projections_seen + 1,
lp_arena,
expr_arena,
)?;
Expand All @@ -328,7 +352,9 @@ impl ProjectionPushDown {
let mut local_projection = Vec::with_capacity(expr.len());

// the projections should all be done at the latest projection node to keep the same schema order
if projections_seen == 0 {
if projections_seen == 0
|| expr.iter().any(|node| has_aexpr_alias(*node, expr_arena))
{
let schema = lp.schema(lp_arena);
for node in expr {
// Due to the pushdown, a lot of projections cannot be done anymore at the final
Expand All @@ -341,13 +367,6 @@ impl ProjectionPushDown {
local_projection.push(node);
}
}
// only aliases should be projected locally in the rest of the projections.
} else {
for expr in expr {
if has_aexpr_alias(expr, expr_arena) {
local_projection.push(expr)
}
}
}

let builder = ALogicalPlanBuilder::new(input, expr_arena, lp_arena);
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/test_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@ def test_melt_projection_pd_block_4997() -> None:
.agg(pl.col("variable").alias("result"))
.collect()
).to_dict(False) == {"row_nr": [0], "result": [["col1", "col2"]]}


def test_double_projection_pushdown() -> None:
assert (
"PROJECT 2/3 COLUMNS"
in (
pl.DataFrame({"c0": [], "c1": [], "c2": []})
.lazy()
.select(["c0", "c1", "c2"])
.select(["c0", "c1"])
).describe_optimized_plan()
)

0 comments on commit b067b15

Please sign in to comment.