Skip to content

Commit

Permalink
accept regex in filter (#3666)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 11, 2022
1 parent c48823a commit c627af4
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 79 deletions.
29 changes: 0 additions & 29 deletions polars/polars-core/src/chunked_array/bitwise.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,35 +253,6 @@ impl BitAnd for BooleanChunked {
}
}

macro_rules! impl_floats {
($_type:ty) => {
impl BitXor for &$_type {
type Output = $_type;

fn bitxor(self, _rhs: Self) -> Self::Output {
unimplemented!()
}
}
impl BitAnd for &$_type {
type Output = $_type;

fn bitand(self, _rhs: Self) -> Self::Output {
unimplemented!()
}
}
impl BitOr for &$_type {
type Output = $_type;

fn bitor(self, _rhs: Self) -> Self::Output {
unimplemented!()
}
}
};
}

impl_floats!(Float64Chunked);
impl_floats!(Float32Chunked);

#[cfg(test)]
mod test {
use super::*;
Expand Down
31 changes: 0 additions & 31 deletions polars/polars-core/src/series/implementations/floats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use ahash::RandomState;
use arrow::array::ArrayRef;
use polars_arrow::prelude::QuantileInterpolOptions;
use std::borrow::Cow;
use std::ops::{BitAnd, BitOr, BitXor};

macro_rules! impl_dyn_series {
($ca: ident) => {
Expand Down Expand Up @@ -180,36 +179,6 @@ macro_rules! impl_dyn_series {
self.0.interpolate().into_series()
}

fn bitand(&self, other: &Series) -> Result<Series> {
let other = if other.len() == 1 {
Cow::Owned(other.cast(self.dtype())?)
} else {
Cow::Borrowed(other)
};
let other = self.0.unpack_series_matching_type(&other)?;
Ok(self.0.bitand(&other).into_series())
}

fn bitor(&self, other: &Series) -> Result<Series> {
let other = if other.len() == 1 {
Cow::Owned(other.cast(self.dtype())?)
} else {
Cow::Borrowed(other)
};
let other = self.0.unpack_series_matching_type(&other)?;
Ok(self.0.bitor(&other).into_series())
}

fn bitxor(&self, other: &Series) -> Result<Series> {
let other = if other.len() == 1 {
Cow::Owned(other.cast(self.dtype())?)
} else {
Cow::Borrowed(other)
};
let other = self.0.unpack_series_matching_type(&other)?;
Ok(self.0.bitxor(&other).into_series())
}

fn rename(&mut self, name: &str) {
self.0.rename(name);
}
Expand Down
33 changes: 21 additions & 12 deletions polars/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,24 +203,33 @@ pub trait SeriesTrait:
fn rename(&mut self, name: &str);

fn bitand(&self, _other: &Series) -> Result<Series> {
panic!(
"bitwise and operation not supported for dtype {:?}",
self.dtype()
)
Err(PolarsError::InvalidOperation(
format!(
"bitwise 'AND' operation not supported for dtype {:?}",
self.dtype()
)
.into(),
))
}

fn bitor(&self, _other: &Series) -> Result<Series> {
panic!(
"bitwise or operation not fit supported for dtype {:?}",
self.dtype()
)
Err(PolarsError::InvalidOperation(
format!(
"bitwise 'OR' operation not supported for dtype {:?}",
self.dtype()
)
.into(),
))
}

fn bitxor(&self, _other: &Series) -> Result<Series> {
panic!(
"bitwise xor operation not fit supported for dtype {:?}",
self.dtype()
)
Err(PolarsError::InvalidOperation(
format!(
"bitwise 'XOR' operation not supported for dtype {:?}",
self.dtype()
)
.into(),
))
}

/// Get the lengths of the underlying chunks
Expand Down
9 changes: 4 additions & 5 deletions polars/polars-lazy/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,11 +302,10 @@ impl LogicalPlanBuilder {

/// Apply a filter
pub fn filter(self, predicate: Expr) -> Self {
let predicate = if has_expr(&predicate, |e| {
matches!(
e,
Expr::Wildcard | Expr::RenameAlias { .. } | Expr::Columns(_)
)
let predicate = if has_expr(&predicate, |e| match e {
Expr::Column(name) => name.starts_with('^') && name.ends_with('$'),
Expr::Wildcard | Expr::RenameAlias { .. } | Expr::Columns(_) => true,
_ => false,
}) {
let rewritten = rewrite_projections(vec![predicate], self.0.schema(), &[]);
combine_predicates_expr(rewritten.into_iter())
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
_scan_ipc_fsspec,
_scan_parquet_fsspec,
)
from .datatypes import IntoExpr
from .expr import Expr, expr_to_lit_or_expr, selection_to_pyexpr_list, wrap_expr
from .frame import DataFrame, LazyFrame, wrap_df, wrap_ldf
from .functions import concat, date_range # DataFrame.describe() & DataFrame.upsample()
Expand Down
5 changes: 5 additions & 0 deletions py-polars/polars/internals/datatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Union

from polars import internals as pli

IntoExpr = Union[int, float, str, "pli.Expr", "pli.Series"]
2 changes: 1 addition & 1 deletion py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ def map_binary(


def fold(
acc: "pli.Expr",
acc: "pli.IntoExpr",
f: Callable[["pli.Series", "pli.Series"], "pli.Series"],
exprs: Union[Sequence[Union["pli.Expr", str]], "pli.Expr"],
) -> "pli.Expr":
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ def test_filter_with_all_expansion() -> None:
"a": [None, None, None],
}
)
out = df.filter(~pl.fold(True, lambda acc, s: acc & s.is_null(), pl.all())) # type: ignore
out = df.filter(~pl.fold(True, lambda acc, s: acc & s.is_null(), pl.all()))
assert out.shape == (2, 3)


Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,17 @@ def test_expression_appends() -> None:

assert out.n_chunks() == 1
assert out.to_series().to_list() == [None, None, None, 1, 1, 2]


def test_regex_in_filter() -> None:
df = pl.DataFrame(
{
"nrs": [1, 2, 3, None, 5],
"names": ["foo", "ham", "spam", "egg", None],
"flt": [1.0, None, 3.0, 1.0, None],
}
)

assert df.filter(
pl.fold(acc=False, f=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3))
).row(0) == (1, "foo", 1.0)

0 comments on commit c627af4

Please sign in to comment.