Skip to content

Commit

Permalink
improve nested wildcard expansion (#2870)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 10, 2022
1 parent 3fe3dc0 commit fe28511
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 22 deletions.
44 changes: 24 additions & 20 deletions polars/polars-lazy/src/logical_plan/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,28 +267,32 @@ pub(crate) fn rewrite_projections(exprs: Vec<Expr>, schema: &Schema, keys: &[Exp
|e| matches!(e, Expr::Function { options, .. } if options.input_wildcard_expansion),
) {
expr.mutate().apply(|e| {
if let Expr::Function { input, .. } = e {
let mut new_inputs = Vec::with_capacity(input.len());
match e {
Expr::Function { input, options, .. }
if options.input_wildcard_expansion =>
{
let mut new_inputs = Vec::with_capacity(input.len());

input.iter_mut().for_each(|e| {
if has_wildcard(e) {
replace_wilcard(e, &mut new_inputs, &exclude, schema)
} else {
#[cfg(feature = "regex")]
{
replace_regex(e, &mut new_inputs, schema)
}
#[cfg(not(feature = "regex"))]
{
new_inputs.push(e.clone())
}
};
});
input.iter_mut().for_each(|e| {
if has_wildcard(e) {
replace_wilcard(e, &mut new_inputs, &exclude, schema)
} else {
#[cfg(feature = "regex")]
{
replace_regex(e, &mut new_inputs, schema)
}
#[cfg(not(feature = "regex"))]
{
new_inputs.push(e.clone())
}
};
});

*input = new_inputs;
false
} else {
true
*input = new_inputs;
// continue there can be more functions that require expansion
true
}
_ => true,
}
});
result.push(expr);
Expand Down
7 changes: 6 additions & 1 deletion polars/polars-lazy/src/physical_plan/expressions/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ impl PhysicalExpr for ColumnExpr {
// now we do a linear search first as the lazy reported schema may still be incorrect
// in debug builds we panic so that it can be fixed when occurring
None => {
debug_assert!(false);
#[cfg(feature = "panic_on_schema")]
{
panic!("invalid schema")
}
// in release we fallback to linear search
#[allow(unreachable_code)]
df.column(&self.0).map(|s| s.clone())
}
}
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,9 @@ def _date(
return _datetime(year, month, day).cast(Date).alias("date")


def concat_str(exprs: Sequence[Union["pli.Expr", str]], sep: str = "") -> "pli.Expr":
def concat_str(
exprs: Union[Sequence[Union["pli.Expr", str]], "pli.Expr"], sep: str = ""
) -> "pli.Expr":
"""
Horizontally Concat Utf8 Series in linear time. Non utf8 columns are cast to utf8.
Expand Down
11 changes: 11 additions & 0 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,14 @@ def test_unique_stable() -> None:
expected = pl.Series("a", [1, 2, 3])

verify_series_and_expr_api(a, expected, "unique", True)


def test_wildcard_expansion() -> None:
# one function requires wildcard expansion the other need
# this tests the nested behavior
# see: #2867

df = pl.DataFrame({"a": ["x", "Y", "z"], "b": ["S", "o", "S"]})
assert df.select(
pl.concat_str(pl.all()).str.to_lowercase()
).to_series().to_list() == ["xs", "yo", "zs"]

0 comments on commit fe28511

Please sign in to comment.