Skip to content

Commit

Permalink
add cumcount expr
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 29, 2021
1 parent 6dd9ded commit b8481f9
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 1 deletion.
22 changes: 21 additions & 1 deletion polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use polars_arrow::array::default_arrays::FromData;
use polars_core::frame::select::Selection;
#[cfg(feature = "diff")]
use polars_core::series::ops::NullBehavior;
use polars_core::utils::get_supertype;
use polars_core::utils::{get_supertype, NoNull};

/// A wrapper trait for any closure `Fn(Vec<Series>) -> Result<Series>`
pub trait SeriesUdf: Send + Sync {
Expand Down Expand Up @@ -1757,6 +1757,26 @@ impl Expr {
};
self.apply(move |s| s.reshape(&dims), output_type)
}

/// Cumulatively count values from 0 to len.
pub fn cumcount(self, reverse: bool) -> Self {
self.apply(
move |s| {
if reverse {
let ca: NoNull<UInt32Chunked> = (0u32..s.len() as u32).rev().collect();
let mut ca = ca.into_inner();
ca.rename(s.name());
Ok(ca.into_series())
} else {
let ca: NoNull<UInt32Chunked> = (0u32..s.len() as u32).collect();
let mut ca = ca.into_inner();
ca.rename(s.name());
Ok(ca.into_series())
}
},
GetOutput::from_type(DataType::UInt32),
)
}
}

/// Create a Column Expression based on a column name.
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Computations
Expr.cummin
Expr.cummax
Expr.cumprod
Expr.cumcount
Expr.dot
Expr.mode
Expr.n_unique
Expand Down
12 changes: 12 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,18 @@ def cummax(self, reverse: bool = False) -> "Expr":
"""
return wrap_expr(self._pyexpr.cummax(reverse))

def cumcount(self, reverse: bool = False) -> "Expr":
"""
Get an array with the cumulative count computed at every element.
Counting from 0 to len
Parameters
----------
reverse
Reverse the operation.
"""
return wrap_expr(self._pyexpr.cumcount(reverse))

def floor(self) -> "Expr":
"""
Floor underlying floating point array to the lowest integers smaller or equal to the float value.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,10 @@ impl PyExpr {
pub fn reshape(&self, dims: Vec<i64>) -> Self {
self.inner.clone().reshape(&dims).into()
}

pub fn cumcount(&self, reverse: bool) -> Self {
self.inner.clone().cumcount(reverse).into()
}
}

impl From<dsl::Expr> for PyExpr {
Expand Down
11 changes: 11 additions & 0 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,14 @@ def test_prefix(fruits_cars: pl.DataFrame) -> None:
df = fruits_cars
out = df.select([pl.all().prefix("reverse_")])
assert out.columns == ["reverse_A", "reverse_fruits", "reverse_B", "reverse_cars"]


def test_cumcount() -> None:
df = pl.DataFrame([["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], columns=["A"])

out = df.groupby("A", maintain_order=True).agg(
[pl.col("A").cumcount(reverse=False).alias("foo")]
)

assert out["foo"][0].to_list() == [0, 1, 2, 3]
assert out["foo"][1].to_list() == [0, 1]

0 comments on commit b8481f9

Please sign in to comment.