Skip to content

Commit

Permalink
Fix tree traversal complexity (#3213)
Browse files Browse the repository at this point in the history
Accumulated filters had O(n^x)
complexity. Don't know how large x was,
but it was big. I think 3-5.
  • Loading branch information
ritchie46 committed Apr 22, 2022
1 parent 2e492df commit 288fd29
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 11 deletions.
60 changes: 60 additions & 0 deletions polars/polars-lazy/src/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,66 @@ impl LazyFrame {
}
}

impl Expr {
/// Get a dot language representation of the Expression.
#[cfg_attr(docsrs, doc(cfg(feature = "dot_diagram")))]
pub fn to_dot(&self) -> Result<String> {
let mut s = String::with_capacity(512);
self.dot_viz(&mut s, (0, 0), "").expect("io error");
s.push_str("\n}");
Ok(s)
}

fn write_dot(
&self,
acc_str: &mut String,
prev_node: &str,
current_node: &str,
id: usize,
) -> std::fmt::Result {
if id == 0 {
writeln!(acc_str, "graph expr {{")
} else {
writeln!(
acc_str,
"\"{}\" -- \"{}\"",
prev_node.replace('"', r#"\""#),
current_node.replace('"', r#"\""#)
)
}
}

#[cfg_attr(docsrs, doc(cfg(feature = "dot_diagram")))]
fn dot_viz(
&self,
acc_str: &mut String,
id: (usize, usize),
prev_node: &str,
) -> std::fmt::Result {
let (mut branch, id) = id;

match self {
Expr::BinaryExpr { left, op, right } => {
let current_node = format!(
r#"BINARY
left _;
op {:?},
right: _ [{},{}]"#,
op, branch, id
);

self.write_dot(acc_str, prev_node, &current_node, id)?;
for input in [left, right] {
input.dot_viz(acc_str, (branch, id + 1), &current_node)?;
branch += 1;
}
Ok(())
}
_ => self.write_dot(acc_str, prev_node, &format!("{}{}", branch, id), id),
}
}
}

impl LogicalPlan {
fn write_dot(
&self,
Expand Down
28 changes: 17 additions & 11 deletions polars/polars-lazy/src/logical_plan/aexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,6 @@ impl AExpr {
BinaryExpr { left, right, op } => {
use DataType::*;

let left_type = arena.get(*left).get_type(schema, ctxt, arena)?;
let right_type = arena.get(*right).get_type(schema, ctxt, arena)?;

let expr_type = match op {
Operator::Lt
| Operator::Gt
Expand All @@ -202,15 +199,24 @@ impl AExpr {
| Operator::LtEq
| Operator::GtEq
| Operator::Or => DataType::Boolean,
Operator::Minus => match (left_type, right_type) {
// T - T != T if T is a datetime / date
(Datetime(tul, _), Datetime(tur, _)) => {
Duration(get_time_units(&tul, &tur))
_ => {
// don't traverse tree until strictly needed. Can have terrible performance.
// # 3210
let left_type = arena.get(*left).get_type(schema, ctxt, arena)?;
let right_type = arena.get(*right).get_type(schema, ctxt, arena)?;

match op {
Operator::Minus => match (left_type, right_type) {
// T - T != T if T is a datetime / date
(Datetime(tul, _), Datetime(tur, _)) => {
Duration(get_time_units(&tul, &tur))
}
(Date, Date) => Duration(TimeUnit::Milliseconds),
(left, right) => get_supertype(&left, &right)?,
},
_ => get_supertype(&left_type, &right_type)?,
}
(Date, Date) => Duration(TimeUnit::Milliseconds),
(left, right) => get_supertype(&left, &right)?,
},
_ => get_supertype(&left_type, &right_type)?,
}
};

let out_field;
Expand Down
28 changes: 28 additions & 0 deletions polars/tests/it/lazy/predicate_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,31 @@ fn test_combine_columns_in_filter() -> Result<()> {
assert!(out.frame_equal(&expected));
Ok(())
}

fn create_n_filters(col_name: &str, num_filters: usize) -> Vec<Expr> {
(0..num_filters)
.into_iter()
.map(|i| col(col_name).eq(lit(format!("{}", i))))
.collect()
}

fn and_filters(expr: Vec<Expr>) -> Expr {
expr.into_iter().reduce(polars::prelude::Expr::and).unwrap()
}

#[test]
fn test_many_filters() -> Result<()> {
// just check if it runs. in #3210
// we had terrible tree traversion perf.
let df = df! {
"id" => ["1", "2"]
}?;
let filters = create_n_filters("id", 30);
let _ = df
.lazy()
.filter(and_filters(filters))
.with_predicate_pushdown(false)
.collect()?;

Ok(())
}

0 comments on commit 288fd29

Please sign in to comment.