Skip to content

Commit

Permalink
ljust and rjust expressions (#3603)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 7, 2022
1 parent 9bd7001 commit a929f24
Show file tree
Hide file tree
Showing 17 changed files with 215 additions and 31 deletions.
1 change: 1 addition & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ bigidx = ["polars-core/bigidx", "polars-lazy/bigidx"]
list_to_struct = ["polars-ops/list_to_struct", "polars-lazy/list_to_struct"]
describe = ["polars-core/describe"]
timezones = ["polars-core/timezones"]
string_justify = ["polars-lazy/string_justify", "polars-ops/string_justify"]

test = [
"lazy",
Expand Down
3 changes: 1 addition & 2 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-core"
version = "0.22.2"
version = "0.22.3"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down Expand Up @@ -125,7 +125,6 @@ docs-selection = [
"concat_str",
"row_hash",
"mode",
"extract_jsonpath",
"cum_agg",
"rolling_window",
"interpolate",
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ chunked_ids = []
list_to_struct = ["polars-ops/list_to_struct"]
python = ["pyo3"]
row_hash = ["polars-core/row_hash"]
string_justify = ["polars-ops/string_justify"]

# no guarantees whatsoever
private = ["polars-time/private"]
Expand Down
32 changes: 32 additions & 0 deletions polars/polars-lazy/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ impl StringNameSpace {
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn zfill(self, alignment: usize) -> Expr {
let function = move |s: Series| {
let ca = s.utf8()?;
Expand All @@ -33,6 +35,36 @@ impl StringNameSpace {
.with_fmt("str.zfill")
}

/// Return the string left justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn ljust(self, width: usize, fillchar: char) -> Expr {
let function = move |s: Series| {
let ca = s.utf8()?;
Ok(ca.ljust(width, fillchar).into_series())
};
self.0
.map(function, GetOutput::from_type(DataType::Utf8))
.with_fmt("str.ljust")
}

/// Return the string right justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn rjust(self, width: usize, fillchar: char) -> Expr {
let function = move |s: Series| {
let ca = s.utf8()?;
Ok(ca.rjust(width, fillchar).into_series())
};
self.0
.map(function, GetOutput::from_type(DataType::Utf8))
.with_fmt("str.rjust")
}

/// Extract each successive non-overlapping match in an individual string as an array
pub fn extract_all(self, pat: &str) -> Expr {
let pat = pat.to_string();
Expand Down
1 change: 1 addition & 0 deletions polars/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ list_to_struct = ["polars-core/dtype-struct", "list"]
list = []
diff = []
strings = ["polars-core/strings"]
string_justify = ["polars-core/strings"]
50 changes: 50 additions & 0 deletions polars/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
fn zfill<'a>(&'a self, alignment: usize) -> Utf8Chunked {
let ca = self.as_utf8();

Expand Down Expand Up @@ -50,6 +52,54 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
ca.apply(f)
}

/// Return the string left justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
fn ljust<'a>(&'a self, width: usize, fillchar: char) -> Utf8Chunked {
let ca = self.as_utf8();

let f = |s: &'a str| {
let padding = width.saturating_sub(s.len());
if padding == 0 {
Cow::Borrowed(s)
} else {
let mut buf = String::with_capacity(width);
buf.push_str(s);
for _ in 0..padding {
buf.push(fillchar)
}
Cow::Owned(buf)
}
};
ca.apply(f)
}

/// Return the string right justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
fn rjust<'a>(&'a self, width: usize, fillchar: char) -> Utf8Chunked {
let ca = self.as_utf8();

let f = |s: &'a str| {
let padding = width.saturating_sub(s.len());
if padding == 0 {
Cow::Borrowed(s)
} else {
let mut buf = String::with_capacity(width);
for _ in 0..padding {
buf.push(fillchar)
}
buf.push_str(s);
Cow::Owned(buf)
}
};
ca.apply(f)
}

/// Check if strings contain a regex pattern
fn contains(&self, pat: &str) -> Result<BooleanChunked> {
let ca = self.as_utf8();
Expand Down
2 changes: 2 additions & 0 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,12 @@
//! * `temporal` - Conversions between [Chrono](https://docs.rs/chrono/) and Polars for temporal data types
//! * `timezones` - Activate timezone support.
//! * `strings` - Extra string utilities for `Utf8Chunked`
//! - `string_justify` - `zfill`, `ljust`, `rjust`
//! * `object` - Support for generic ChunkedArrays called `ObjectChunked<T>` (generic over `T`).
//! These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
//! * Performance related:
//! - `simd` - SIMD operations _(nightly only)_
//! - `performant` - more fast paths, slower compile times.
//! - `bigidx` - Activate this feature if you expect >> 2^32 rows. This has not been needed by anyone.
//! This allows polars to scale up way beyond that by using `u64` as an index.
//! Polars will be a bit slower with this feature activated as many data structures
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ features = [
"cumulative_eval",
"list_to_struct",
"to_dummies",
"string_justify",
]

# [patch.crates-io]
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ The following methods are available under the `Expr.str` attribute.
ExprStringNameSpace.lstrip
ExprStringNameSpace.rstrip
ExprStringNameSpace.zfill
ExprStringNameSpace.ljust
ExprStringNameSpace.rjust
ExprStringNameSpace.contains
ExprStringNameSpace.json_path_match
ExprStringNameSpace.extract
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ The following methods are available under the `Series.str` attribute.
StringNameSpace.rstrip
StringNameSpace.lstrip
StringNameSpace.zfill
StringNameSpace.ljust
StringNameSpace.rjust
StringNameSpace.slice
StringNameSpace.encode
StringNameSpace.decode
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ def write_body(self) -> None:
if series.dtype == Object:
self.elements.append(f"{series[r]}")
else:
self.elements.append(f"{series._s.get_fmt(r)}")
self.elements.append(
f"<pre>{series._s.get_fmt(r)}</pre>"
)

def write(self, inner: str) -> None:
self.elements.append(inner)
Expand Down
30 changes: 30 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4356,6 +4356,36 @@ def zfill(self, alignment: int) -> Expr:
"""
return wrap_expr(self._pyexpr.str_zfill(alignment))

def ljust(self, width: int, fillchar: str = " ") -> Expr:
"""
Return the string left justified in a string of length width.
Padding is done using the specified `fillchar`,
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
width
justify left to this length
fillchar
fill with this ASCII character
"""
return wrap_expr(self._pyexpr.str_ljust(width, fillchar))

def rjust(self, width: int, fillchar: str = " ") -> Expr:
"""
Return the string right justified in a string of length width.
Padding is done using the specified `fillchar`,
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
width
justify right to this length
fillchar
fill with this ASCII character
"""
return wrap_expr(self._pyexpr.str_rjust(width, fillchar))

def contains(self, pattern: str) -> Expr:
"""
Check if string contains regex.
Expand Down
36 changes: 36 additions & 0 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4317,6 +4317,42 @@ def zfill(self, alignment: int) -> Series:
s = wrap_s(self._s)
return s.to_frame().select(pli.col(s.name).str.zfill(alignment)).to_series()

def ljust(self, width: int, fillchar: str = " ") -> Series:
"""
Return the string left justified in a string of length width.
Padding is done using the specified `fillchar`,
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
width
justify left to this length
fillchar
fill with this ASCII character
"""
s = wrap_s(self._s)
return (
s.to_frame().select(pli.col(s.name).str.ljust(width, fillchar)).to_series()
)

def rjust(self, width: int, fillchar: str = " ") -> Series:
"""
Return the string right justified in a string of length width.
Padding is done using the specified `fillchar`,
The original string is returned if width is less than or equal to `len(s)`.
Parameters
----------
width
justify right to this length
fillchar
fill with this ASCII character
"""
s = wrap_s(self._s)
return (
s.to_frame().select(pli.col(s.name).str.rjust(width, fillchar)).to_series()
)

def to_lowercase(self) -> Series:
"""
Modify the strings to their lowercase equivalent.
Expand Down
8 changes: 8 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,14 @@ impl PyExpr {
self.clone().inner.str().zfill(alignment).into()
}

pub fn str_ljust(&self, width: usize, fillchar: char) -> PyExpr {
self.clone().inner.str().ljust(width, fillchar).into()
}

pub fn str_rjust(&self, width: usize, fillchar: char) -> PyExpr {
self.clone().inner.str().rjust(width, fillchar).into()
}

pub fn str_contains(&self, pat: String) -> PyExpr {
let function = move |s: Series| {
let ca = s.utf8()?;
Expand Down
27 changes: 0 additions & 27 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,30 +248,3 @@ def test_expression_appends() -> None:

assert out.n_chunks() == 1
assert out.to_series().to_list() == [None, None, None, 1, 1, 2]


def test_zfill() -> None:
df = pl.DataFrame(
{
"num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
}
)

out = [
"-0010",
"-0001",
"00000",
"00001",
"00010",
"00100",
"01000",
"10000",
"100000",
"1000000",
None,
]
assert (
df.with_column(pl.col("num").cast(str).str.zfill(5)).to_series().to_list()
== out
)
assert df["num"].cast(str).str.zfill(5) == out
44 changes: 44 additions & 0 deletions py-polars/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,47 @@ def test_extract_all_count() -> None:

assert df["foo"].str.extract_all(r"a").dtype == pl.List
assert df["foo"].str.count_match(r"a").dtype == pl.UInt32


def test_zfill() -> None:
df = pl.DataFrame(
{
"num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
}
)

out = [
"-0010",
"-0001",
"00000",
"00001",
"00010",
"00100",
"01000",
"10000",
"100000",
"1000000",
None,
]
assert (
df.with_column(pl.col("num").cast(str).str.zfill(5)).to_series().to_list()
== out
)
assert df["num"].cast(str).str.zfill(5) == out


def test_ljust_and_rjust() -> None:
df = pl.DataFrame({"a": ["foo", "longer_foo", "longest_fooooooo", "hi"]})
assert df.select(
[
pl.col("a").str.rjust(10).alias("rjust"),
pl.col("a").str.rjust(10).str.lengths().alias("rjust_len"),
pl.col("a").str.ljust(10).alias("ljust"),
pl.col("a").str.ljust(10).str.lengths().alias("ljust_len"),
]
).to_dict(False) == {
"rjust": [" foo", "longer_foo", "longest_fooooooo", " hi"],
"rjust_len": [10, 10, 16, 10],
"ljust": ["foo ", "longer_foo", "longest_fooooooo", "hi "],
"ljust_len": [10, 10, 16, 10],
}

0 comments on commit a929f24

Please sign in to comment.