Skip to content

Commit

Permalink
feat(rust,python): add direct series dispatch for various functions (#…
Browse files Browse the repository at this point in the history
…13010)

Co-authored-by: Stijn de Gooijer <stijndegooijer@gmail.com>
  • Loading branch information
mcrumiller and stinodego committed Dec 14, 2023
1 parent 02b3600 commit c4244ff
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 44 deletions.
54 changes: 24 additions & 30 deletions py-polars/polars/series/series.py
Expand Up @@ -1388,11 +1388,7 @@ def any(self, *, ignore_nulls: bool = True) -> bool | None:
>>> pl.Series([None, False]).any(ignore_nulls=False) # Returns None
"""
return (
self.to_frame()
.select(F.col(self.name).any(ignore_nulls=ignore_nulls))
.item()
)
return self._s.any(ignore_nulls=ignore_nulls)

@overload
def all(self, *, ignore_nulls: Literal[True] = ...) -> bool:
Expand Down Expand Up @@ -1438,11 +1434,7 @@ def all(self, *, ignore_nulls: bool = True) -> bool | None:
>>> pl.Series([None, True]).all(ignore_nulls=False) # Returns None
"""
return (
self.to_frame()
.select(F.col(self.name).all(ignore_nulls=ignore_nulls))
.item()
)
return self._s.all(ignore_nulls=ignore_nulls)

def log(self, base: float = math.e) -> Series:
"""Compute the logarithm to a given base."""
Expand Down Expand Up @@ -1698,7 +1690,7 @@ def mean(self) -> int | float | None:

def product(self) -> int | float:
"""Reduce this Series to the product value."""
return self.to_frame().select(F.col(self.name).product()).to_series().item()
return self._s.product()

def pow(self, exponent: int | float | None | Series) -> Series:
"""
Expand Down Expand Up @@ -1765,7 +1757,7 @@ def nan_max(self) -> int | float | date | datetime | timedelta | str:
whereas polars defaults to ignoring them.
"""
return self.to_frame().select(F.col(self.name).nan_max()).item()
return self.to_frame().select_seq(F.col(self.name).nan_max()).item()

def nan_min(self) -> int | float | date | datetime | timedelta | str:
"""
Expand All @@ -1775,7 +1767,7 @@ def nan_min(self) -> int | float | date | datetime | timedelta | str:
whereas polars defaults to ignoring them.
"""
return self.to_frame().select(F.col(self.name).nan_min()).item()
return self.to_frame().select_seq(F.col(self.name).nan_min()).item()

def std(self, ddof: int = 1) -> float | None:
"""
Expand All @@ -1797,7 +1789,7 @@ def std(self, ddof: int = 1) -> float | None:
"""
if not self.dtype.is_numeric():
return None
return self.to_frame().select(F.col(self.name).std(ddof)).to_series().item()
return self._s.std(ddof)

def var(self, ddof: int = 1) -> float | None:
"""
Expand All @@ -1819,7 +1811,7 @@ def var(self, ddof: int = 1) -> float | None:
"""
if not self.dtype.is_numeric():
return None
return self.to_frame().select(F.col(self.name).var(ddof)).to_series().item()
return self._s.var(ddof)

def median(self) -> float | None:
"""
Expand Down Expand Up @@ -2060,7 +2052,7 @@ def cut(

result = (
self.to_frame()
.select(
.select_seq(
F.col(self.name).cut(
breaks,
labels=labels,
Expand Down Expand Up @@ -2477,10 +2469,8 @@ def value_counts(self, *, sort: bool = False, parallel: bool = False) -> DataFra
│ green ┆ 1 │
└───────┴───────┘
"""
return (
self.to_frame()
.select(F.col(self.name).value_counts(sort=sort, parallel=parallel))
.unnest(self.name)
return pl.DataFrame._from_pydf(
self._s.value_counts(sort=sort, parallel=parallel)
)

def unique_counts(self) -> Series:
Expand Down Expand Up @@ -2526,7 +2516,7 @@ def entropy(self, base: float = math.e, *, normalize: bool = False) -> float | N
"""
return (
self.to_frame()
.select(F.col(self.name).entropy(base, normalize=normalize))
.select_seq(F.col(self.name).entropy(base, normalize=normalize))
.to_series()
.item()
)
Expand Down Expand Up @@ -2796,6 +2786,7 @@ def slice(self, offset: int, length: int | None = None) -> Series:
]
"""
return self._from_pyseries(self._s.slice(offset=offset, length=length))

def append(self, other: Series) -> Self:
"""
Expand Down Expand Up @@ -3964,16 +3955,19 @@ def is_between(
]
"""
if isinstance(lower_bound, str):
lower_bound = F.lit(lower_bound)
if isinstance(upper_bound, str):
upper_bound = F.lit(upper_bound)
if closed == "none":
out = (self > lower_bound) & (self < upper_bound)
elif closed == "both":
out = (self >= lower_bound) & (self <= upper_bound)
elif closed == "right":
out = (self > lower_bound) & (self <= upper_bound)
elif closed == "left":
out = (self >= lower_bound) & (self < upper_bound)

return (
self.to_frame()
.select(F.col(self.name).is_between(lower_bound, upper_bound, closed))
.to_series()
)
if isinstance(out, pl.Expr):
out = F.select(out).to_series()

return out

def to_numpy(
self,
Expand Down
44 changes: 44 additions & 0 deletions py-polars/src/series/aggregation.rs
Expand Up @@ -6,6 +6,24 @@ use crate::PySeries;

#[pymethods]
impl PySeries {
fn any(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
let s = self.series.bool().map_err(PyPolarsErr::from)?;
Ok(if ignore_nulls {
Some(s.any())
} else {
s.any_kleene()
})
}

fn all(&self, ignore_nulls: bool) -> PyResult<Option<bool>> {
let s = self.series.bool().map_err(PyPolarsErr::from)?;
Ok(if ignore_nulls {
Some(s.all())
} else {
s.all_kleene()
})
}

fn arg_max(&self) -> Option<usize> {
self.series.arg_max()
}
Expand Down Expand Up @@ -56,6 +74,10 @@ impl PySeries {
.into_py(py))
}

fn product(&self, py: Python) -> PyResult<PyObject> {
Ok(Wrap(self.series.product().get(0).map_err(PyPolarsErr::from)?).into_py(py))
}

fn quantile(
&self,
quantile: f64,
Expand All @@ -70,6 +92,28 @@ impl PySeries {
Ok(Python::with_gil(|py| Wrap(out).into_py(py)))
}

fn std(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
Ok(Wrap(
self.series
.std_as_series(ddof)
.map_err(PyPolarsErr::from)?
.get(0)
.map_err(PyPolarsErr::from)?,
)
.into_py(py))
}

fn var(&self, py: Python, ddof: u8) -> PyResult<PyObject> {
Ok(Wrap(
self.series
.var_as_series(ddof)
.map_err(PyPolarsErr::from)?
.get(0)
.map_err(PyPolarsErr::from)?,
)
.into_py(py))
}

fn sum(&self, py: Python) -> PyResult<PyObject> {
Ok(Wrap(
self.series
Expand Down
13 changes: 13 additions & 0 deletions py-polars/src/series/mod.rs
Expand Up @@ -686,6 +686,19 @@ impl PySeries {
fn tail(&self, n: usize) -> Self {
self.series.tail(Some(n)).into()
}

fn value_counts(&self, sort: bool, parallel: bool) -> PyResult<PyDataFrame> {
let out = self
.series
.value_counts(sort, parallel)
.map_err(PyPolarsErr::from)?;
Ok(out.into())
}

fn slice(&self, offset: i64, length: Option<usize>) -> Self {
let length = length.unwrap_or_else(|| self.series.len());
self.series.slice(offset, length).into()
}
}

macro_rules! impl_set_with_mask {
Expand Down
14 changes: 0 additions & 14 deletions py-polars/tests/unit/series/test_series.py
Expand Up @@ -2320,20 +2320,6 @@ def test_extend_constant(const: Any, dtype: pl.PolarsDataType) -> None:
assert_series_equal(s.extend_constant(const, 3), expected)


def test_any_all() -> None:
a = pl.Series("a", [True, False, True])
assert a.any() is True
assert a.all() is False

a = pl.Series("a", [True, True, True])
assert a.any() is True
assert a.all() is True

a = pl.Series("a", [False, False, False])
assert a.any() is False
assert a.all() is False


def test_product() -> None:
a = pl.Series("a", [1, 2, 3])
out = a.product()
Expand Down

0 comments on commit c4244ff

Please sign in to comment.