Skip to content

Commit

Permalink
allow regex expansion in binary/ternary expressions (#3769)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 22, 2022
1 parent a71898d commit 996587a
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 11 deletions.
44 changes: 33 additions & 11 deletions polars/polars-lazy/src/logical_plan/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ fn expand_regex(expr: &Expr, result: &mut Vec<Expr>, schema: &Schema, pattern: &
let mut new_expr = expr.clone();

new_expr.mutate().apply(|e| match &e {
Expr::Column(_) => {
Expr::Column(pat) if pat.as_ref() == pattern => {
*e = Expr::Column(Arc::from(name.as_str()));
false
true
}
_ => true,
});
Expand All @@ -109,20 +109,42 @@ fn expand_regex(expr: &Expr, result: &mut Vec<Expr>, schema: &Schema, pattern: &
/// that are selected by that regex in `result`. The regex should start with `^` and end with `$`.
fn replace_regex(expr: &Expr, result: &mut Vec<Expr>, schema: &Schema) {
let roots = expr_to_root_column_names(expr);
// only in simple expression (no binary expression)
// we pattern match regex columns
if roots.len() == 1 {
let name = &*roots[0];
let mut regex = None;
for name in &roots {
if name.starts_with('^') && name.ends_with('$') {
expand_regex(expr, result, schema, name)
} else {
let expr = rewrite_special_aliases(expr.clone());
result.push(expr)
match regex {
None => {
regex = Some(name);
expand_regex(expr, result, schema, name)
}
Some(r) => {
assert_eq!(
r, name,
"an expression is not allowed to have different regexes"
)
}
}
}
} else {
}
if regex.is_none() {
let expr = rewrite_special_aliases(expr.clone());
result.push(expr)
}

// // only in simple expression (no binary expression)
// // we pattern match regex columns
// if roots.len() == 1 {
// let name = &*roots[0];
// if name.starts_with('^') && name.ends_with('$') {
// expand_regex(expr, result, schema, name)
// } else {
// let expr = rewrite_special_aliases(expr.clone());
// result.push(expr)
// }
// } else {
// let expr = rewrite_special_aliases(expr.clone());
// result.push(expr)
// }
}

/// replace `columns(["A", "B"])..` with `col("A")..`, `col("B")..`
Expand Down
51 changes: 51 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,3 +2236,54 @@ def test_fill_null_limits() -> None:
False,
],
}


def test_selection_regex_and_multicol() -> None:
test_df = pl.DataFrame(
{
"a": [1, 2, 3, 4],
"b": [5, 6, 7, 8],
"c": [9, 10, 11, 12],
"foo": [13, 14, 15, 16],
}
)

# Selection only
test_df.select(
[
pl.col(["a", "b", "c"]).suffix("_list"),
pl.all().exclude("foo").suffix("_wild"),
pl.col("^\\w$").suffix("_regex"),
]
)

# Multi * Single
assert test_df.select(pl.col(["a", "b", "c"]) * pl.col("foo")).to_dict(False) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}
assert test_df.select(pl.all().exclude("foo") * pl.col("foo")).to_dict(False) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}

assert test_df.select(pl.col("^\\w$") * pl.col("foo")).to_dict(False) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}

# Multi * Multi
assert test_df.select(pl.col(["a", "b", "c"]) * pl.col(["a", "b", "c"])).to_dict(
False
) == {"a": [1, 4, 9, 16], "b": [25, 36, 49, 64], "c": [81, 100, 121, 144]}
assert test_df.select(pl.all().exclude("foo") * pl.all().exclude("foo")).to_dict(
False
) == {"a": [1, 4, 9, 16], "b": [25, 36, 49, 64], "c": [81, 100, 121, 144]}
assert test_df.select(pl.col("^\\w$") * pl.col("^\\w$")).to_dict(False) == {
"a": [1, 4, 9, 16],
"b": [25, 36, 49, 64],
"c": [81, 100, 121, 144],
}

0 comments on commit 996587a

Please sign in to comment.