Skip to content

Commit

Permalink
feat(rust,python): str.strip with multiple chars (#5929)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Dec 30, 2022
1 parent 9a07781 commit 10f2d61
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 110 deletions.
6 changes: 3 additions & 3 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,9 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
Replace { all, literal } => map_as_slice!(strings::replace, literal, all),
Uppercase => map!(strings::uppercase),
Lowercase => map!(strings::lowercase),
Strip(matches) => map!(strings::strip, matches),
LStrip(matches) => map!(strings::lstrip, matches),
RStrip(matches) => map!(strings::rstrip, matches),
Strip(matches) => map!(strings::strip, matches.as_deref()),
LStrip(matches) => map!(strings::lstrip, matches.as_deref()),
RStrip(matches) => map!(strings::rstrip, matches.as_deref()),
}
}
}
Expand Down
51 changes: 36 additions & 15 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ pub enum StringFunction {
},
Uppercase,
Lowercase,
Strip(Option<char>),
RStrip(Option<char>),
LStrip(Option<char>),
Strip(Option<String>),
RStrip(Option<String>),
LStrip(Option<String>),
}

impl Display for StringFunction {
Expand Down Expand Up @@ -142,35 +142,56 @@ pub(super) fn rjust(s: &Series, width: usize, fillchar: char) -> PolarsResult<Se
Ok(ca.rjust(width, fillchar).into_series())
}

pub(super) fn strip(s: &Series, matches: Option<char>) -> PolarsResult<Series> {
pub(super) fn strip(s: &Series, matches: Option<&str>) -> PolarsResult<Series> {
let ca = s.utf8()?;
if let Some(matches) = matches {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_matches(matches)))
.into_series())
if matches.chars().count() == 1 {
// Fast path for when a single character is passed
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_matches(matches.chars().next().unwrap())))
.into_series())
} else {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_matches(|c| matches.contains(c))))
.into_series())
}
} else {
Ok(ca.apply(|s| Cow::Borrowed(s.trim())).into_series())
}
}

pub(super) fn lstrip(s: &Series, matches: Option<char>) -> PolarsResult<Series> {
pub(super) fn lstrip(s: &Series, matches: Option<&str>) -> PolarsResult<Series> {
let ca = s.utf8()?;

if let Some(matches) = matches {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_start_matches(matches)))
.into_series())
if matches.chars().count() == 1 {
// Fast path for when a single character is passed
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_start_matches(matches.chars().next().unwrap())))
.into_series())
} else {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_start_matches(|c| matches.contains(c))))
.into_series())
}
} else {
Ok(ca.apply(|s| Cow::Borrowed(s.trim_start())).into_series())
}
}

pub(super) fn rstrip(s: &Series, matches: Option<char>) -> PolarsResult<Series> {
pub(super) fn rstrip(s: &Series, matches: Option<&str>) -> PolarsResult<Series> {
let ca = s.utf8()?;
if let Some(matches) = matches {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_end_matches(matches)))
.into_series())
if matches.chars().count() == 1 {
// Fast path for when a single character is passed
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_end_matches(matches.chars().next().unwrap())))
.into_series())
} else {
Ok(ca
.apply(|s| Cow::Borrowed(s.trim_end_matches(|c| matches.contains(c))))
.into_series())
}
} else {
Ok(ca.apply(|s| Cow::Borrowed(s.trim_end())).into_series())
}
Expand Down
12 changes: 6 additions & 6 deletions polars/polars-lazy/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,20 +341,20 @@ impl StringNameSpace {
)
}

/// Remove whitespace on both sides.
pub fn strip(self, matches: Option<char>) -> Expr {
/// Remove leading and trailing characters, or whitespace if matches is None.
pub fn strip(self, matches: Option<String>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::Strip(matches)))
}

/// Remove leading whitespace.
pub fn lstrip(self, matches: Option<char>) -> Expr {
/// Remove leading characters, or whitespace if matches is None.
pub fn lstrip(self, matches: Option<String>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::LStrip(matches)))
}

/// Remove trailing whitespace.
pub fn rstrip(self, matches: Option<char>) -> Expr {
/// Remove trailing characters, or whitespace if matches is None..
pub fn rstrip(self, matches: Option<String>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::RStrip(matches)))
}
Expand Down
12 changes: 3 additions & 9 deletions polars/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,25 +181,19 @@ pub(crate) fn parse_sql_expr(expr: &SqlExpr) -> PolarsResult<Expr> {
Some((TrimWhereField::Both, sql_expr)) => {
let lit = parse_sql_expr(sql_expr)?;
if let Expr::Literal(LiteralValue::Utf8(val)) = lit {
if val.len() == 1 {
return Ok(expr.str().strip(Some(val.chars().next().unwrap())));
}
return Ok(expr.str().strip(Some(val)));
}
}
Some((TrimWhereField::Leading, sql_expr)) => {
let lit = parse_sql_expr(sql_expr)?;
if let Expr::Literal(LiteralValue::Utf8(val)) = lit {
if val.len() == 1 {
return Ok(expr.str().lstrip(Some(val.chars().next().unwrap())));
}
return Ok(expr.str().lstrip(Some(val)));
}
}
Some((TrimWhereField::Trailing, sql_expr)) => {
let lit = parse_sql_expr(sql_expr)?;
if let Expr::Literal(LiteralValue::Utf8(val)) = lit {
if val.len() == 1 {
return Ok(expr.str().rstrip(Some(val.chars().next().unwrap())));
}
return Ok(expr.str().rstrip(Some(val)));
}
}
}
Expand Down
128 changes: 86 additions & 42 deletions py-polars/polars/internals/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,92 +263,136 @@ def to_lowercase(self) -> pli.Expr:
"""
return pli.wrap_expr(self._pyexpr.str_to_lowercase())

def strip(self, matches: None | str = None) -> pli.Expr:
"""
Remove leading and trailing whitespace.
def strip(self, matches: str | None = None) -> pli.Expr:
r"""
Remove leading and trailing characters.
Parameters
----------
matches
An optional single character that should be trimmed
The set of characters to be removed. All combinations of this set of
characters will be stripped. If set to None (default), all whitespace is
removed instead.
Examples
--------
>>> df = pl.DataFrame({"foo": [" lead", "trail ", " both "]})
>>> df = pl.DataFrame({"foo": [" hello ", "\tworld"]})
>>> df.select(pl.col("foo").str.strip())
shape: (3, 1)
shape: (2, 1)
┌───────┐
│ foo │
│ --- │
│ str │
╞═══════╡
lead
hello
├╌╌╌╌╌╌╌┤
│ trail │
├╌╌╌╌╌╌╌┤
│ both │
│ world │
└───────┘
Characters can be stripped by passing a string as argument. Note that whitespace
will not be stripped automatically when doing so.
>>> df.select(pl.col("foo").str.strip("od\t"))
shape: (2, 1)
┌─────────┐
│ foo │
│ --- │
│ str │
╞═════════╡
│ hello │
├╌╌╌╌╌╌╌╌╌┤
│ worl │
└─────────┘
"""
if matches is not None and len(matches) > 1:
raise ValueError("matches should contain a single character")
return pli.wrap_expr(self._pyexpr.str_strip(matches))

def lstrip(self, matches: None | str = None) -> pli.Expr:
"""
Remove leading whitespace.
def lstrip(self, matches: str | None = None) -> pli.Expr:
r"""
Remove leading characters.
Parameters
----------
matches
The set of characters to be removed. All combinations of this set of
characters will be stripped. If set to None (default), all whitespace is
removed instead.
Examples
--------
>>> df = pl.DataFrame({"foo": [" lead", "trail ", " both "]})
>>> df = pl.DataFrame({"foo": [" hello ", "\tworld"]})
>>> df.select(pl.col("foo").str.lstrip())
shape: (3, 1)
shape: (2, 1)
┌────────┐
│ foo │
│ --- │
│ str │
╞════════╡
│ lead │
├╌╌╌╌╌╌╌╌┤
│ trail │
│ hello │
├╌╌╌╌╌╌╌╌┤
both
world
└────────┘
Characters can be stripped by passing a string as argument. Note that whitespace
will not be stripped automatically when doing so.
>>> df.select(pl.col("foo").str.lstrip("wod\t"))
shape: (2, 1)
┌─────────┐
│ foo │
│ --- │
│ str │
╞═════════╡
│ hello │
├╌╌╌╌╌╌╌╌╌┤
│ rld │
└─────────┘
"""
if matches is not None and len(matches) > 1:
raise ValueError("matches should contain a single character")
return pli.wrap_expr(self._pyexpr.str_lstrip(matches))

def rstrip(self, matches: None | str = None) -> pli.Expr:
"""
Remove trailing whitespace.
def rstrip(self, matches: str | None = None) -> pli.Expr:
r"""
Remove trailing characters.
Parameters
----------
matches
An optional single character that should be trimmed
The set of characters to be removed. All combinations of this set of
characters will be stripped. If set to None (default), all whitespace is
removed instead.
Examples
--------
>>> df = pl.DataFrame({"foo": [" lead", "trail ", " both "]})
>>> df = pl.DataFrame({"foo": [" hello ", "world\t"]})
>>> df.select(pl.col("foo").str.rstrip())
shape: (3, 1)
┌───────┐
│ foo │
│ --- │
│ str │
╞═══════╡
│ lead │
├╌╌╌╌╌╌╌┤
│ trail │
├╌╌╌╌╌╌╌┤
│ both │
└───────┘
shape: (2, 1)
┌────────┐
│ foo │
│ --- │
│ str │
╞════════╡
│ hello │
├╌╌╌╌╌╌╌╌┤
│ world │
└────────┘
Characters can be stripped by passing a string as argument. Note that whitespace
will not be stripped automatically when doing so.
>>> df.select(pl.col("foo").str.rstrip("wod\t"))
shape: (2, 1)
┌─────────┐
│ foo │
│ --- │
│ str │
╞═════════╡
│ hello │
├╌╌╌╌╌╌╌╌╌┤
│ worl │
└─────────┘
"""
if matches is not None and len(matches) > 1:
raise ValueError("matches should contain a single character")
return pli.wrap_expr(self._pyexpr.str_rstrip(matches))

def zfill(self, alignment: int) -> pli.Expr:
Expand Down

0 comments on commit 10f2d61

Please sign in to comment.