Skip to content

Commit

Permalink
zfill expression (#3593)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 6, 2022
1 parent 585e2ad commit 0750664
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 0 deletions.
14 changes: 14 additions & 0 deletions polars/polars-lazy/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@ impl StringNameSpace {
.with_fmt("str.extract")
}

/// Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
pub fn zfill(self, alignment: usize) -> Expr {
let function = move |s: Series| {
let ca = s.utf8()?;
Ok(ca.zfill(alignment).into_series())
};
self.0
.map(function, GetOutput::from_type(DataType::Utf8))
.with_fmt("str.zfill")
}

/// Extract each successive non-overlapping match in an individual string as an array
pub fn extract_all(self, pat: &str) -> Expr {
let pat = pat.to_string();
Expand Down
31 changes: 31 additions & 0 deletions polars/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,37 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
ca.apply_kernel_cast(&string_lengths)
}

/// Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
fn zfill<'a>(&'a self, alignment: usize) -> Utf8Chunked {
let ca = self.as_utf8();

let f = |s: &'a str| {
let alignment = alignment.saturating_sub(s.len());
if alignment == 0 {
return Cow::Borrowed(s);
}
if let Some(stripped) = s.strip_prefix('-') {
Cow::Owned(format!(
"-{:0alignment$}{value}",
0,
alignment = alignment,
value = stripped
))
} else {
Cow::Owned(format!(
"{:0alignment$}{value}",
0,
alignment = alignment,
value = s
))
}
};
ca.apply(f)
}

/// Check if strings contain a regex pattern
fn contains(&self, pat: &str) -> Result<BooleanChunked> {
let ca = self.as_utf8();
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ The following methods are available under the `Expr.str` attribute.
ExprStringNameSpace.strip
ExprStringNameSpace.lstrip
ExprStringNameSpace.rstrip
ExprStringNameSpace.zfill
ExprStringNameSpace.contains
ExprStringNameSpace.json_path_match
ExprStringNameSpace.extract
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ The following methods are available under the `Series.str` attribute.
StringNameSpace.strip
StringNameSpace.rstrip
StringNameSpace.lstrip
StringNameSpace.zfill
StringNameSpace.slice
StringNameSpace.encode
StringNameSpace.decode
Expand Down
49 changes: 49 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4307,6 +4307,55 @@ def rstrip(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_rstrip())

def zfill(self, alignment: int) -> Expr:
"""
Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
rather than before.
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
alignment
Fill the value up to this length
Examples
--------
>>> df = pl.DataFrame(
... {
... "num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
... }
... )
>>> df.with_column(pl.col("num").cast(str).str.zfill(5))
shape: (11, 1)
┌─────────┐
│ num │
│ --- │
│ str │
╞═════════╡
│ -0010 │
├╌╌╌╌╌╌╌╌╌┤
│ -0001 │
├╌╌╌╌╌╌╌╌╌┤
│ 00000 │
├╌╌╌╌╌╌╌╌╌┤
│ 00001 │
├╌╌╌╌╌╌╌╌╌┤
│ ... │
├╌╌╌╌╌╌╌╌╌┤
│ 10000 │
├╌╌╌╌╌╌╌╌╌┤
│ 100000 │
├╌╌╌╌╌╌╌╌╌┤
│ 1000000 │
├╌╌╌╌╌╌╌╌╌┤
│ null │
└─────────┘
"""
return wrap_expr(self._pyexpr.str_zfill(alignment))

def contains(self, pattern: str) -> Expr:
"""
Check if string contains regex.
Expand Down
15 changes: 15 additions & 0 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4302,6 +4302,21 @@ def rstrip(self) -> Series:
s = wrap_s(self._s)
return s.to_frame().select(pli.col(s.name).str.rstrip()).to_series()

def zfill(self, alignment: int) -> Series:
"""
Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
rather than before.
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
alignment
Fill the value up to this length
"""
s = wrap_s(self._s)
return s.to_frame().select(pli.col(s.name).str.zfill(alignment)).to_series()

def to_lowercase(self) -> Series:
"""
Modify the strings to their lowercase equivalent.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,10 @@ impl PyExpr {
.into()
}

pub fn str_zfill(&self, alignment: usize) -> PyExpr {
self.clone().inner.str().zfill(alignment).into()
}

pub fn str_contains(&self, pat: String) -> PyExpr {
let function = move |s: Series| {
let ca = s.utf8()?;
Expand Down
27 changes: 27 additions & 0 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,30 @@ def test_expression_appends() -> None:

assert out.n_chunks() == 1
assert out.to_series().to_list() == [None, None, None, 1, 1, 2]


def test_zfill() -> None:
df = pl.DataFrame(
{
"num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
}
)

out = [
"-0010",
"-0001",
"00000",
"00001",
"00010",
"00100",
"01000",
"10000",
"100000",
"1000000",
None,
]
assert (
df.with_column(pl.col("num").cast(str).str.zfill(5)).to_series().to_list()
== out
)
assert df["num"].cast(str).str.zfill(5) == out

0 comments on commit 0750664

Please sign in to comment.