Skip to content

Commit

Permalink
ignore unknown supertype (#4333)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 9, 2022
1 parent ead3f52 commit a61bc54
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 42 deletions.
1 change: 1 addition & 0 deletions polars/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ fn _get_supertype(l: &DataType, r: &DataType) -> Option<DataType> {
let st = _get_supertype(inner, other)?;
Some(DataType::List(Box::new(st)))
}
(dt, Unknown) => Some(dt.clone()),
_ => None,
}
}
Expand Down
90 changes: 48 additions & 42 deletions polars/polars-lazy/src/logical_plan/optimizer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,45 +143,39 @@ impl OptimizationRule for TypeCoercionRule {
let (falsy, type_false) =
get_aexpr_and_type(expr_arena, falsy_node, &input_schema)?;

if type_true == type_false ||
// data type unknown still has to be set
matches!(&type_true, DataType::Unknown) || matches!(&type_false, DataType::Unknown)
{
None
} else {
let st = get_supertype(&type_true, &type_false).expect("supertype");
let st = modify_supertype(st, truthy, falsy, &type_true, &type_false);

// only cast if the type is not already the super type.
// this can prevent an expensive flattening and subsequent aggregation
// in a groupby context. To be able to cast the groups need to be
// flattened
let new_node_truthy = if type_true != st {
expr_arena.add(AExpr::Cast {
expr: truthy_node,
data_type: st.clone(),
strict: false,
})
} else {
truthy_node
};
early_escape(&type_true, &type_false)?;
let st = get_supertype(&type_true, &type_false).expect("supertype");
let st = modify_supertype(st, truthy, falsy, &type_true, &type_false);

let new_node_falsy = if type_false != st {
expr_arena.add(AExpr::Cast {
expr: falsy_node,
data_type: st,
strict: false,
})
} else {
falsy_node
};
// only cast if the type is not already the super type.
// this can prevent an expensive flattening and subsequent aggregation
// in a groupby context. To be able to cast the groups need to be
// flattened
let new_node_truthy = if type_true != st {
expr_arena.add(AExpr::Cast {
expr: truthy_node,
data_type: st.clone(),
strict: false,
})
} else {
truthy_node
};

Some(AExpr::Ternary {
truthy: new_node_truthy,
falsy: new_node_falsy,
predicate,
let new_node_falsy = if type_false != st {
expr_arena.add(AExpr::Cast {
expr: falsy_node,
data_type: st,
strict: false,
})
}
} else {
falsy_node
};

Some(AExpr::Ternary {
truthy: new_node_truthy,
falsy: new_node_falsy,
predicate,
})
}
AExpr::BinaryExpr {
left: node_left,
Expand Down Expand Up @@ -300,9 +294,9 @@ impl OptimizationRule for TypeCoercionRule {
}
}

if type_left == type_right || compare_cat_to_string || datetime_arithmetic ||
// data type unknown still has to be set
matches!(&type_left, DataType::Unknown) || matches!(&type_right, DataType::Unknown)
if compare_cat_to_string
|| datetime_arithmetic
|| early_escape(&type_left, &type_right).is_none()
{
None
} else {
Expand Down Expand Up @@ -366,6 +360,8 @@ impl OptimizationRule for TypeCoercionRule {
let (_, type_left) = get_aexpr_and_type(expr_arena, input[0], &input_schema)?;
let (_, type_other) = get_aexpr_and_type(expr_arena, other_node, &input_schema)?;

early_escape(&type_left, &type_other)?;

match (&type_left, type_other) {
// cast both local and global string cache
// note that there might not yet be a rev
Expand Down Expand Up @@ -405,6 +401,7 @@ impl OptimizationRule for TypeCoercionRule {
let (left, type_left) = get_aexpr_and_type(expr_arena, input[0], &input_schema)?;
let (fill_value, type_fill_value) =
get_aexpr_and_type(expr_arena, other_node, &input_schema)?;

let super_type = get_supertype(&type_left, &type_fill_value).ok()?;
let super_type =
modify_supertype(super_type, left, fill_value, &type_left, &type_fill_value);
Expand All @@ -427,9 +424,7 @@ impl OptimizationRule for TypeCoercionRule {
let (fill_value, type_other) =
get_aexpr_and_type(expr_arena, other_node, &input_schema)?;

if type_self == type_other {
return None;
}
early_escape(&type_self, &type_other)?;

let super_type = get_supertype(&type_self, &type_other).ok()?;
let super_type =
Expand Down Expand Up @@ -470,6 +465,17 @@ impl OptimizationRule for TypeCoercionRule {
}
}

fn early_escape(type_self: &DataType, type_other: &DataType) -> Option<()> {
if type_self == type_other
|| matches!(type_self, DataType::Unknown)
|| matches!(type_other, DataType::Unknown)
{
None
} else {
Some(())
}
}

#[cfg(test)]
mod test {
use crate::logical_plan::optimizer::stack_opt::OptimizationRule;
Expand Down
15 changes: 15 additions & 0 deletions polars/tests/it/lazy/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,18 @@ fn test_sorted_path_joins() -> Result<()> {

Ok(())
}

#[test]
fn test_unknown_supertype_ignore() -> Result<()> {
let df = df![
"col1" => [0., 3., 2., 1.],
"col2" => [0., 0., 1., 1.],
]?;

let out = df
.lazy()
.with_columns([(col("col1").fill_null(0f64) + col("col2"))])
.collect()?;
assert_eq!(out.shape(), (4, 2));
Ok(())
}

0 comments on commit a61bc54

Please sign in to comment.