Skip to content

Commit

Permalink
value_counts expression
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 23, 2022
1 parent bcb220f commit 99bb515
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 0 deletions.
18 changes: 18 additions & 0 deletions polars/polars-lazy/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2070,6 +2070,24 @@ impl Expr {
)
}

#[cfg(feature = "dtype-struct")]
#[cfg_attr(docsrs, doc(cfg(feature = "dtype-struct")))]
/// Count all unique values and create a struct mapping value to count
pub fn value_counts(self) -> Self {
self.apply(
|s| {
s.value_counts()
.map(|df| df.into_struct(s.name()).into_series())
},
GetOutput::map_field(|fld| {
Field::new(
fld.name(),
DataType::Struct(vec![fld.clone(), Field::new("counts", IDX_DTYPE)]),
)
}),
)
}

#[cfg(feature = "strings")]
pub fn str(self) -> string::StringNameSpace {
string::StringNameSpace(self)
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ Computations
Expr.log
Expr.log10
Expr.exp
Expr.value_counts

Manipulation/ selection
-----------------------
Expand Down
37 changes: 37 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2786,6 +2786,43 @@ def extend_constant(
"""
return wrap_expr(self._pyexpr.extend_constant(value, n))

def value_counts(self) -> "Expr":
"""
Count all unique values and create a struct mapping value to count
Returns
-------
Dtype Struct
Examples
--------
>>> df = pl.DataFrame(
... {
... "id": ["a", "b", "b", "c", "c", "c"],
... }
... )
>>> df.select(
... [
... pl.col("id").value_counts(),
... ]
... )
shape: (3, 1)
┌─────────────────────────────────────┐
│ id │
│ --- │
│ struct[2]{'id': str, 'counts': u32} │
╞═════════════════════════════════════╡
│ {"c",3} │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ {"b",2} │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ {"a",1} │
└─────────────────────────────────────┘
"""
return wrap_expr(self._pyexpr.value_counts())

# Below are the namespaces defined. Keep these at the end of the definition of Expr, as to not confuse mypy with
# the type annotation `str` with the namespace "str"

Expand Down
3 changes: 3 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ impl PyExpr {
pub fn count(&self) -> PyExpr {
self.clone().inner.count().into()
}
pub fn value_counts(&self) -> PyExpr {
self.inner.clone().value_counts().into()
}
pub fn cast(&self, data_type: Wrap<DataType>, strict: bool) -> PyExpr {
let dt = data_type.0;
let expr = if strict {
Expand Down
21 changes: 21 additions & 0 deletions py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,24 @@ def test_struct_function_expansion() -> None:
"a",
"b",
]


def test_value_counts_expr() -> None:
df = pl.DataFrame(
{
"id": ["a", "b", "b", "c", "c", "c"],
}
)

out = (
df.select(
[
pl.col("id").value_counts(),
]
)
.to_series()
.to_list()
)

out = sorted(out) # type: ignore
assert out == [("a", 1), ("b", 2), ("c", 3)]

0 comments on commit 99bb515

Please sign in to comment.