Skip to content

Commit

Permalink
feat[rust]: format_str to lazy expressions API (#4693)
Browse files Browse the repository at this point in the history
  • Loading branch information
hpux735 committed Sep 3, 2022
1 parent 16136c0 commit fb0aaa3
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 1 deletion.
3 changes: 3 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ asof_join = ["polars-core/asof_join", "polars-lazy/asof_join"]
cross_join = ["polars-core/cross_join", "polars-lazy/cross_join"]
dot_product = ["polars-core/dot_product", "polars-lazy/dot_product"]
concat_str = ["polars-core/concat_str", "polars-lazy/concat_str"]
format_str = ["polars-lazy/format_str", "concat_str"]
row_hash = ["polars-core/row_hash", "polars-lazy/row_hash"]
reinterpret = ["polars-core/reinterpret"]
decompress = ["polars-io/decompress"]
Expand Down Expand Up @@ -133,6 +134,7 @@ test = [
"list",
"round_series",
"csv-file",
"format_str",
"dtype-categorical",
"cum_agg",
"fmt",
Expand Down Expand Up @@ -233,6 +235,7 @@ docs-selection = [
"asof_join",
"cross_join",
"concat_str",
"format_str",
"decompress",
"mode",
"take_opt_iter",
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ pub fn argsort_by(by: &[Series], reverse: &[bool]) -> Result<IdxCa> {
first.argsort_multiple(&by, &reverse)
}

// utility to be able to also add literals ot concat_str function
// utility to be able to also add literals to concat_str function
#[cfg(feature = "concat_str")]
enum IterBroadCast<'a> {
Column(Box<dyn PolarsIterator<Item = Option<&'a str>> + 'a>),
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ cross_join = ["polars-core/cross_join"]
asof_join = ["polars-core/asof_join", "polars-time"]
dot_product = ["polars-core/dot_product"]
concat_str = ["polars-core/concat_str"]
format_str = ["concat_str"]
arange = []
mode = ["polars-core/mode"]
cum_agg = ["polars-core/cum_agg"]
Expand Down
32 changes: 32 additions & 0 deletions polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,38 @@ pub fn concat_str<E: AsRef<[Expr]>>(s: E, sep: &str) -> Expr {
}
}

#[cfg(feature = "format_str")]
#[cfg_attr(docsrs, doc(cfg(feature = "format_str")))]
/// Format the results of an array of expressions using a format string
pub fn format_str<E: AsRef<[Expr]>>(format: &str, args: E) -> Result<Expr> {
let mut args: std::collections::VecDeque<Expr> = args.as_ref().to_vec().into();

// Parse the format string, and seperate substrings between placeholders
let segments: Vec<&str> = format.split("{}").collect();

if segments.len() - 1 != args.len() {
return Err(PolarsError::ShapeMisMatch(
"number of placeholders should equal the number of arguments".into(),
));
}

let mut exprs: Vec<Expr> = Vec::new();

for (i, s) in segments.iter().enumerate() {
if i > 0 {
if let Some(arg) = args.pop_front() {
exprs.push(arg);
}
}

if !s.is_empty() {
exprs.push(lit(s.to_string()))
}
}

Ok(concat_str(exprs, ""))
}

/// Concat lists entries.
#[cfg(feature = "list")]
#[cfg_attr(docsrs, doc(cfg(feature = "list")))]
Expand Down
1 change: 1 addition & 0 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@
//! - `checked_arithmetic` - checked arithmetic/ returning `None` on invalid operations.
//! - `dot_product` - Dot/inner product on Series and Expressions.
//! - `concat_str` - Concat string data in linear time.
//! - `format_str` - Format the results of an array of expressions using a format string
//! - `reinterpret` - Utility to reinterpret bits to signed/unsigned
//! - `take_opt_iter` - Take from a Series with `Iterator<Item=Option<usize>>`
//! - `mode` - [Return the most occurring value(s)](crate::chunked_array::ops::ChunkUnique::mode)
Expand Down
26 changes: 26 additions & 0 deletions polars/tests/it/lazy/functions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use super::*;

#[test]
#[cfg(feature = "format_str")]
fn test_format_str() {
let a = df![
"a" => [1, 2],
"b" => ["a", "b"]
]
.unwrap();

let out = a
.lazy()
.select([format_str("({}, {}]", [col("a"), col("b")])
.unwrap()
.alias("formatted")])
.collect()
.unwrap();

let expected = df![
"formatted" => ["(1, a]", "(2, b]"]
]
.unwrap();

assert!(out.frame_equal_missing(&expected));
}
1 change: 1 addition & 0 deletions polars/tests/it/lazy/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod explodes;
mod expressions;
mod functions;
mod groupby;
mod groupby_dynamic;
mod predicate_queries;
Expand Down

0 comments on commit fb0aaa3

Please sign in to comment.