Skip to content

Commit

Permalink
product expression
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 8, 2022
1 parent 0465717 commit 7788875
Show file tree
Hide file tree
Showing 16 changed files with 170 additions and 12 deletions.
2 changes: 2 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ dynamic_groupby = ["polars-core/dynamic_groupby", "polars-lazy/dynamic_groupby"]
ewma = ["polars-core/ewma", "polars-lazy/ewma"]
dot_diagram = ["polars-lazy/dot_diagram"]
dataframe_arithmetic = ["polars-core/dataframe_arithmetic"]
product = ["polars-core/product"]

# don't use this
private = ["polars-lazy/private"]
Expand Down Expand Up @@ -179,6 +180,7 @@ docs-selection = [
"abs",
"dot_diagram",
"string_encoding",
"product",
]

bench = [
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ horizontal_concat = []
abs = []
ewma = ["polars-utils"]
dataframe_arithmetic = []
product = []

dynamic_groupby = ["polars-time", "dtype-datetime", "dtype-date"]

Expand Down Expand Up @@ -130,6 +131,7 @@ docs-selection = [
"abs",
"dataframe_arithmetic",
"string_encoding",
"product",
]

[dependencies]
Expand Down
17 changes: 17 additions & 0 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ pub trait ChunkAggSeries {
fn median_as_series(&self) -> Series {
unimplemented!()
}
/// Get the product of the ChunkedArray as a new Series of length 1.
fn prod_as_series(&self) -> Series {
unimplemented!()
}
/// Get the quantile of the ChunkedArray as a new Series of length 1.
fn quantile_as_series(
&self,
Expand Down Expand Up @@ -343,6 +347,19 @@ where
let val = [self.median()];
Series::new(self.name(), val)
}

fn prod_as_series(&self) -> Series {
let mut prod = None;
for opt_v in self.into_iter() {
match (prod, opt_v) {
(_, None) => return Self::full_null(self.name(), 1).into_series(),
(None, Some(v)) => prod = Some(v),
(Some(p), Some(v)) => prod = Some(p * v),
}
}
Self::new_from_opt_slice(self.name(), &[prod]).into_series()
}

fn quantile_as_series(
&self,
quantile: f64,
Expand Down
58 changes: 55 additions & 3 deletions polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,23 @@ impl Series {
{
use DataType::*;
match self.dtype() {
Boolean => self.cast(&DataType::Int64).unwrap().cumsum(reverse),
Boolean => self.cast(&DataType::UInt32).unwrap().cumsum(reverse),
Int8 | UInt8 | Int16 | UInt16 => {
let s = self.cast(&Int64).unwrap();
s.cumsum(reverse)
}
Int32 => {
let ca = self.i32().unwrap();
ca.cumsum(reverse).into_series()
}
UInt32 => {
let ca = self.u32().unwrap();
ca.cumsum(reverse).into_series()
}
UInt64 => {
let ca = self.u64().unwrap();
ca.cumsum(reverse).into_series()
}
Int64 => {
let ca = self.i64().unwrap();
ca.cumsum(reverse).into_series()
Expand All @@ -567,7 +579,7 @@ impl Series {

/// Get an array with the cumulative product computed at every element
///
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16, Int32, UInt32}` the `Series` is
/// first cast to `Int64` to prevent overflow issues.
#[cfg_attr(docsrs, doc(cfg(feature = "cum_agg")))]
#[allow(unused_variables)]
Expand All @@ -577,14 +589,18 @@ impl Series {
use DataType::*;
match self.dtype() {
Boolean => self.cast(&DataType::Int64).unwrap().cumprod(reverse),
Int8 | UInt8 | Int16 | UInt16 => {
Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
let s = self.cast(&Int64).unwrap();
s.cumprod(reverse)
}
Int64 => {
let ca = self.i64().unwrap();
ca.cumprod(reverse).into_series()
}
UInt64 => {
let ca = self.u64().unwrap();
ca.cumprod(reverse).into_series()
}
Float32 => {
let ca = self.f32().unwrap();
ca.cumprod(reverse).into_series()
Expand All @@ -602,6 +618,42 @@ impl Series {
}
}

/// Get the product of an array.
///
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
/// first cast to `Int64` to prevent overflow issues.
#[cfg_attr(docsrs, doc(cfg(feature = "product")))]
pub fn product(&self) -> Series {
#[cfg(feature = "product")]
{
use DataType::*;
match self.dtype() {
Boolean => self.cast(&DataType::Int64).unwrap().product(),
Int8 | UInt8 | Int16 | UInt16 => {
let s = self.cast(&Int64).unwrap();
s.product()
}
Int64 => {
let ca = self.i64().unwrap();
ca.prod_as_series()
}
Float32 => {
let ca = self.f32().unwrap();
ca.prod_as_series()
}
Float64 => {
let ca = self.f64().unwrap();
ca.prod_as_series()
}
dt => panic!("cumprod not supported for dtype: {:?}", dt),
}
}
#[cfg(not(feature = "product"))]
{
panic!("activate 'product' feature")
}
}

/// Apply a rolling variance to a Series. See:
#[cfg_attr(docsrs, doc(cfg(feature = "rolling_window")))]
pub fn rolling_var(&self, _options: RollingOptions) -> Result<Series> {
Expand Down
34 changes: 32 additions & 2 deletions polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,14 @@ impl Expr {
pub fn cumprod(self, reverse: bool) -> Self {
self.apply(
move |s: Series| Ok(s.cumprod(reverse)),
GetOutput::same_type(),
GetOutput::map_dtype(|dt| {
use DataType::*;
match dt {
Float32 => Float32,
Float64 => Float64,
_ => Int64,
}
}),
)
}

Expand All @@ -1131,7 +1138,30 @@ impl Expr {
pub fn cummax(self, reverse: bool) -> Self {
self.apply(
move |s: Series| Ok(s.cummax(reverse)),
GetOutput::same_type(),
GetOutput::map_dtype(|dt| {
use DataType::*;
match dt {
Float32 => Float32,
Float64 => Float64,
_ => Int64,
}
}),
)
}

/// Get the product aggreagtion of an expresion
#[cfg_attr(docsrs, doc(cfg(feature = "product")))]
pub fn product(self) -> Self {
self.apply(
move |s: Series| Ok(s.product()),
GetOutput::map_dtype(|dt| {
use DataType::*;
match dt {
Float32 => Float32,
Float64 => Float64,
_ => Int64,
}
}),
)
}

Expand Down
16 changes: 9 additions & 7 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
//! - `ewma` - Exponential moving average windows
//! - `abs` - Get absolute values of Series
//! - `arange` - Range operation on Series
//! - `product` - Compute the product of a Series.
//! * `DataFrame` pretty printing (Choose one or none, but not both):
//! - `plain_fmt` - no overflowing (less compilation times)
//! - `pretty_fmt` - cell overflow (increased compilation times)
Expand All @@ -165,13 +166,14 @@
//!
//! | data type | feature flag |
//! |-------------------------|-------------------|
//! | DateType | dtype-date |
//! | DatetimeType | dtype-datetime |
//! | TimeType | dtype-time |
//! | Int8Type | dtype-i8 |
//! | Int16Type | dtype-i16 |
//! | UInt8Type | dtype-u8 |
//! | UInt16Type | dtype-u16 |
//! | Date | dtype-date |
//! | Datetime | dtype-datetime |
//! | Time | dtype-time |
//! | Duration | dtype-duration |
//! | Int8 | dtype-i8 |
//! | Int16 | dtype-i16 |
//! | UInt8 | dtype-u8 |
//! | UInt16 | dtype-u16 |
//! | Categorical | dtype-categorical |
//!
//!
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ features = [
"dataframe_arithmetic",
"json",
"string_encoding",
"product",
]

# [patch.crates-io]
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Aggregation
DataFrame.var
DataFrame.median
DataFrame.quantile
DataFrame.product

Descriptive stats
-----------------
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,11 @@ Aggregation
Expr.min
Expr.sum
Expr.mean
Expr.mean
Expr.median
Expr.first
Expr.last
Expr.product
Expr.list
Expr.agg_groups
Expr.count
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Aggregation
Series.var
Series.median
Series.quantile
Series.product
Series.mode
Series.arg_min
Series.arg_max
Expand Down
6 changes: 6 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,12 @@ def median(self) -> "Expr":
"""
return wrap_expr(self._pyexpr.median())

def product(self) -> "Expr":
"""
Compute the product of an expression
"""
return wrap_expr(self._pyexpr.product())

def n_unique(self) -> "Expr":
"""Count unique values."""
return wrap_expr(self._pyexpr.n_unique())
Expand Down
6 changes: 6 additions & 0 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3696,6 +3696,12 @@ def median(self) -> "DataFrame":
"""
return wrap_df(self._df.median())

def product(self) -> "DataFrame":
"""
Aggregate the columns of this DataFrame to their product values
"""
return self.select(pli.all().product())

def quantile(self, quantile: float, interpolation: str = "nearest") -> "DataFrame":
"""
Aggregate the columns of this DataFrame to their quantile value.
Expand Down
6 changes: 6 additions & 0 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,12 @@ def mean(self) -> Union[int, float]:
"""
return self._s.mean()

def product(self) -> Union[int, float]:
"""
Reduce this Series to the product value.
"""
return self.to_frame().select(pli.col(self.name).product()).to_series()[0]

def min(self) -> Union[int, float]:
"""
Get the minimal value in this Series.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ impl PyExpr {
self.clone().inner.cumprod(reverse).into()
}

pub fn product(&self) -> PyExpr {
self.clone().inner.product().into()
}

pub fn str_parse_date(&self, fmt: Option<String>) -> PyExpr {
let function = move |s: Series| {
let ca = s.utf8()?;
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,3 +1770,17 @@ def test_df_broadcast() -> None:
df = pl.DataFrame({"a": [1, 2, 3]})
out = df.with_column(pl.Series([[1, 2]]))
assert out.shape == (3, 2)


def test_product() -> None:
df = pl.DataFrame(
{
"int": [1, 2, 3],
"flt": [-1.0, 12.0, 9.0],
"bool_0": [True, False, True],
"bool_1": [True, True, True],
}
)
out = df.product()
expected = pl.DataFrame({"int": [6], "flt": [-108.0], "bool_0": [0], "bool_1": [1]})
assert out.frame_equal(expected)
12 changes: 12 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1404,3 +1404,15 @@ def test_any_all() -> None:
verify_series_and_expr_api(a, expected, "any")
expected = pl.Series("a", [False])
verify_series_and_expr_api(a, expected, "all")


def test_product() -> None:
a = pl.Series("a", [1, 2, 3])
out = a.product()
assert out == 6
a = pl.Series("a", [1, 2, None])
out = a.product()
assert out is None
a = pl.Series("a", [None, 2, 3])
out = a.product()
assert out is None

0 comments on commit 7788875

Please sign in to comment.