Skip to content

Commit

Permalink
fix groupby aggregation on empty df (#3688)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 13, 2022
1 parent 5b9a432 commit 2427ad4
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
15 changes: 12 additions & 3 deletions polars/polars-lazy/src/physical_plan/expressions/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,17 @@ impl PhysicalExpr for ApplyExpr {

match self.collect_groups {
ApplyOptions::ApplyGroups => {
let name = ac.series().name().to_string();
let s = ac.series();

// collection of empty list leads to a null dtype
// see: #3687
if s.len() == 0 {
let s = self.function.call_udf(&mut [s.clone()])?;
let ca = ListChunked::full(s.name(), &s, 0);
return Ok(self.finish_apply_groups(ac, ca));
}

let name = s.name().to_string();

let mut ca: ListChunked = ac
.aggregated()
Expand All @@ -109,8 +119,7 @@ impl PhysicalExpr for ApplyExpr {
.collect();

ca.rename(&name);
let ac = self.finish_apply_groups(ac, ca);
Ok(ac)
Ok(self.finish_apply_groups(ac, ca))
}
ApplyOptions::ApplyFlat => {
// make sure the groups are updated because we are about to throw away
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,23 @@ def test_groupby_sorted_empty_dataframe_3680() -> None:
.tail(1)
.collect()
).shape == (0, 2)


def test_groupby_custom_agg_empty_list() -> None:
assert (
pl.DataFrame(
[
pl.Series("key", [], dtype=pl.Categorical),
pl.Series("val", [], dtype=pl.Float64),
]
)
.groupby("key")
.agg(
[
pl.col("val").mean().alias("mean"),
pl.col("val").std().alias("std"),
pl.col("val").skew().alias("skew"),
pl.col("val").kurtosis().alias("kurt"),
]
)
).dtypes == [pl.Categorical, pl.Float64, pl.Float64, pl.Float64, pl.Float64]

0 comments on commit 2427ad4

Please sign in to comment.