Skip to content

Commit

Permalink
fix[rust]: fix quadratic behavior in deeply nested expressions (#4782)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 8, 2022
1 parent 1279d35 commit ac2be84
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 16 deletions.
39 changes: 23 additions & 16 deletions polars/polars-lazy/src/logical_plan/aexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,42 +224,49 @@ impl AExpr {
BinaryExpr { left, right, op } => {
use DataType::*;

let expr_type = match op {
let field = match op {
Operator::Lt
| Operator::Gt
| Operator::Eq
| Operator::NotEq
| Operator::And
| Operator::LtEq
| Operator::GtEq
| Operator::Or => DataType::Boolean,
| Operator::Or => {
let out_field;
let out_name = {
out_field = arena.get(*left).to_field(schema, ctxt, arena)?;
out_field.name().as_str()
};
Field::new(out_name, DataType::Boolean)
}
_ => {
// don't traverse tree until strictly needed. Can have terrible performance.
// # 3210
let left_type = arena.get(*left).get_type(schema, ctxt, arena)?;

// take the left field as a whole.
// don't take dtype and name separate as that splits the tree every node
// leading to quadratic behavior. # 4736
let mut left_field = arena.get(*left).to_field(schema, ctxt, arena)?;
let right_type = arena.get(*right).get_type(schema, ctxt, arena)?;

match op {
Operator::Minus => match (left_type, right_type) {
let super_type = match op {
Operator::Minus => match (&left_field.dtype, right_type) {
// T - T != T if T is a datetime / date
(Datetime(tul, _), Datetime(tur, _)) => {
Duration(get_time_units(&tul, &tur))
Duration(get_time_units(tul, &tur))
}
(Date, Date) => Duration(TimeUnit::Milliseconds),
(left, right) => try_get_supertype(&left, &right)?,
(left, right) => try_get_supertype(left, &right)?,
},
_ => try_get_supertype(&left_type, &right_type)?,
}
_ => try_get_supertype(&left_field.dtype, &right_type)?,
};
left_field.coerce(super_type);
left_field
}
};

let out_field;
let out_name = {
out_field = arena.get(*left).to_field(schema, ctxt, arena)?;
out_field.name().as_str()
};

Ok(Field::new(out_name, expr_type))
Ok(field)
}
Sort { expr, .. } => arena.get(*expr).to_field(schema, ctxt, arena),
Take { expr, .. } => arena.get(*expr).to_field(schema, ctxt, arena),
Expand Down
11 changes: 11 additions & 0 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import os
from functools import reduce
from operator import add
from string import ascii_letters
from typing import Any, cast

import numpy as np
Expand Down Expand Up @@ -1462,3 +1465,11 @@ def test_lazy_cache_hit(capfd: Any) -> None:
).collect().to_dict(False) == {"a": [0, 0, 0], "c": ["x", "y", "z"]}
(out, _) = capfd.readouterr()
assert "CACHE HIT" in out


def test_quadratic_behavior_4736() -> None:
# we don't assert anything.
# If this function does not stall
# our tests it has passed.
df = pl.DataFrame(columns=list(ascii_letters))
df.lazy().select(reduce(add, (pl.col(fld) for fld in df.columns)))

0 comments on commit ac2be84

Please sign in to comment.