Skip to content

Commit

Permalink
fix windows expressions and add range to df macro (#2650)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 15, 2022
1 parent b4a9ea2 commit 3682c7d
Show file tree
Hide file tree
Showing 10 changed files with 136 additions and 18 deletions.
17 changes: 17 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,23 @@ dot_diagram = ["polars-lazy/dot_diagram"]
dataframe_arithmetic = ["polars-core/dataframe_arithmetic"]
product = ["polars-core/product"]

test = [
"lazy",
"private",
"rolling_window",
"rank",
"list",
"round_series",
"csv-file",
"dtype-categorical",
"cum_agg",
"polars-core/plain_fmt",
"diff",
"abs",
"parquet",
"ipc",
]

# don't use this
private = ["polars-lazy/private"]

Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/cum_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ mod test {
use crate::prelude::*;

#[test]
#[cfg(feature = "dtype-u8")]
fn test_cummax() {
let ca = UInt8Chunked::new("foo", &[None, Some(1), Some(3), None, Some(1)]);
let out = ca.cummax(true);
Expand All @@ -149,6 +150,7 @@ mod test {
}

#[test]
#[cfg(feature = "dtype-u8")]
fn test_cummin() {
let ca = UInt8Chunked::new("foo", &[None, Some(1), Some(3), None, Some(2)]);
let out = ca.cummin(true);
Expand Down
21 changes: 21 additions & 0 deletions polars/polars-core/src/named_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,27 @@ impl_named_from!([Option<i64>], Int64Type, from_slice_options);
impl_named_from!([Option<f32>], Float32Type, from_slice_options);
impl_named_from!([Option<f64>], Float64Type, from_slice_options);

macro_rules! impl_named_from_range {
($range:ty, $polars_type:ident) => {
impl NamedFrom<$range, $polars_type> for ChunkedArray<$polars_type> {
fn new(name: &str, range: $range) -> Self {
let values = range.collect::<Vec<_>>();
ChunkedArray::<$polars_type>::from_vec(name, values)
}
}

impl NamedFrom<$range, $polars_type> for Series {
fn new(name: &str, range: $range) -> Self {
ChunkedArray::new(name, range).into_series()
}
}
};
}
impl_named_from_range!(std::ops::Range<i64>, Int64Type);
impl_named_from_range!(std::ops::Range<i32>, Int32Type);
impl_named_from_range!(std::ops::Range<u64>, UInt64Type);
impl_named_from_range!(std::ops::Range<u32>, UInt32Type);

impl<T: AsRef<[Series]>> NamedFrom<T, ListType> for Series {
fn new(name: &str, s: T) -> Self {
let series_slice = s.as_ref();
Expand Down
12 changes: 10 additions & 2 deletions polars/polars-lazy/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2034,7 +2034,11 @@ impl Expr {
},
GetOutput::from_type(DataType::Boolean),
)
.with_fmt("any")
.with_function_options(|mut opt| {
opt.fmt_str = "any";
opt.auto_explode = true;
opt
})
}

/// Check if all boolean values are `true`
Expand All @@ -2050,7 +2054,11 @@ impl Expr {
},
GetOutput::from_type(DataType::Boolean),
)
.with_fmt("all")
.with_function_options(|mut opt| {
opt.fmt_str = "all";
opt.auto_explode = true;
opt
})
}

#[cfg(feature = "strings")]
Expand Down
13 changes: 1 addition & 12 deletions polars/polars-lazy/src/logical_plan/aexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,7 @@ impl AExpr {
Count => Ok(Field::new("count", DataType::UInt32)),
Window { function, .. } => {
let e = arena.get(*function);

let field = e.to_field(schema, ctxt, arena);
match e {
Agg(_) => field,
_ => {
let field = field?;
Ok(Field::new(
field.name(),
DataType::List(Box::new(field.data_type().clone())),
))
}
}
e.to_field(schema, ctxt, arena)
}
IsUnique(expr) => {
let field = arena.get(*expr).to_field(schema, ctxt, arena)?;
Expand Down
9 changes: 6 additions & 3 deletions polars/polars-lazy/src/physical_plan/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ impl PhysicalExpr for BinaryExpr {
// One of the two exprs is aggregated with flat aggregation, e.g. `e.min(), e.max(), e.first()`

// if the groups_len == df.len we can just apply all flat.
// within an aggregation a `col().first() - lit(0)` must still produce a boolean array of group length,
// that's why a literal also takes this branch
(AggState::AggregatedFlat(s), AggState::NotAggregated(_) | AggState::Literal(_))
if s.len() != df.height() =>
{
Expand Down Expand Up @@ -149,9 +151,10 @@ impl PhysicalExpr for BinaryExpr {
Ok(ac_l)
}
// if the groups_len == df.len we can just apply all flat.
(AggState::NotAggregated(_) | AggState::Literal(_), AggState::AggregatedFlat(s))
if s.len() != df.height() =>
{
(
AggState::Literal(_) | AggState::AggregatedList(_) | AggState::NotAggregated(_),
AggState::AggregatedFlat(s),
) if s.len() != df.height() => {
// this is now a list
let l = ac_l.aggregated();
let l = l.list().unwrap();
Expand Down
1 change: 0 additions & 1 deletion polars/polars-lazy/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ mod optimization_checks;
mod predicate_queries;
mod projection_queries;
mod queries;
mod window_expressions;

fn load_df() -> DataFrame {
df!("a" => &[1, 2, 3, 4, 5],
Expand Down
13 changes: 13 additions & 0 deletions polars/tests/it/lazy/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
mod window_expressions;

use polars::prelude::*;

pub(crate) fn fruits_cars() -> DataFrame {
df!(
"A"=> [1, 2, 3, 4, 5],
"fruits"=> ["banana", "banana", "apple", "apple", "banana"],
"B"=> [5, 4, 3, 2, 1],
"cars"=> ["beetle", "audi", "beetle", "beetle", "beetle"]
)
.unwrap()
}
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,68 @@ fn test_window_mapping() -> Result<()> {

Ok(())
}

#[test]
fn test_window_exprs_in_binary_exprs() -> Result<()> {
let df = df![
"value" => 0..8,
"cat" => [0, 0, 0, 0, 1, 1, 1, 1]
]?
.lazy()
.with_columns([
(col("value") - col("value").mean().over([col("cat")]))
.cast(DataType::Int32)
.alias("centered"),
(col("value") - col("value").std().over([col("cat")]))
.cast(DataType::Int32)
.alias("scaled"),
((col("value") - col("value").mean().over([col("cat")]))
/ col("value").std().over([col("cat")]))
.cast(DataType::Int32)
.alias("stdized"),
((col("value") - col("value").mean()).over([col("cat")]) / col("value").std())
.cast(DataType::Int32)
.alias("stdized2"),
((col("value") - col("value").mean()) / col("value").std())
.over([col("cat")])
.cast(DataType::Int32)
.alias("stdized3"),
])
.sum()
.collect()?;

let expected = df![
"value" => [28],
"cat" => [4],
"centered" => [0],
"scaled" => [14],
"stdized" => [0],
"stdized2" => [0],
"stdized3" => [0]
]?;

assert!(df.frame_equal(&expected));

Ok(())
}

#[test]
fn test_window_exprs_any_all() -> Result<()> {
let df = df![
"var1"=> ["A", "B", "C", "C", "D", "D", "E", "E"],
"var2"=> [false, true, false, false, false, true, true, true],
]?
.lazy()
.select([
col("var2").any().over([col("var1")]).alias("any"),
col("var2").all().over([col("var1")]).alias("all"),
])
.collect()?;

let expected = df![
"any" => [false, true, false, false, true, true, true, true],
"all" => [false, true, false, false, false, false, true, true],
]?;
assert!(df.frame_equal(&expected));
Ok(())
}
1 change: 1 addition & 0 deletions polars/tests/it/main.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
mod joins;
mod lazy;

0 comments on commit 3682c7d

Please sign in to comment.