Skip to content

Commit

Permalink
ChunkAgg::mean -> Option<f64> instead of Option<Native>
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 8, 2021
1 parent 6eecdbb commit 4f0d244
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 60 deletions.
23 changes: 11 additions & 12 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,9 @@ where
}
}

fn mean(&self) -> Option<T::Native> {
fn mean(&self) -> Option<f64> {
let len = (self.len() - self.null_count()) as f64;
self.sum()
.map(|v| NumCast::from(v.to_f64().unwrap() / len).unwrap())
self.sum().map(|v| v.to_f64().unwrap() / len)
}

fn median(&self) -> Option<T::Native> {
Expand Down Expand Up @@ -239,9 +238,9 @@ impl ChunkAgg<u32> for BooleanChunked {
Some(min_max_helper(self, false))
}

fn mean(&self) -> Option<u32> {
let len = self.len() - self.null_count();
self.sum().map(|v| (v as usize / len) as u32)
fn mean(&self) -> Option<f64> {
let len = (self.len() - self.null_count()) as f64;
self.sum().map(|v| v as f64 / len)
}

fn median(&self) -> Option<u32> {
Expand All @@ -260,6 +259,9 @@ impl ChunkAgg<u32> for BooleanChunked {
}
}

impl ChunkAgg<Series> for ListChunked {}
impl ChunkAgg<String> for Utf8Chunked {}

// Needs the same trait bounds as the implementation of ChunkedArray<T> of dyn Series
impl<T> ChunkAggSeries for ChunkedArray<T>
where
Expand Down Expand Up @@ -289,11 +291,8 @@ where
if self.null_count() == self.len() {
Self::full_null(self.name(), 1).into_series()
} else {
let s = self.sum_as_series();
let mut out =
s.cast::<Float64Type>().unwrap() / (self.len() - self.null_count()) as f64;
out.rename(self.name());
out
let val = [self.mean()];
Series::new(self.name(), val)
}
}
fn median_as_series(&self) -> Series {
Expand Down Expand Up @@ -421,7 +420,7 @@ impl ChunkAggSeries for BooleanChunked {
}
fn mean_as_series(&self) -> Series {
let v = ChunkAgg::mean(self);
let mut ca: UInt32Chunked = [v].iter().copied().collect();
let mut ca: Float64Chunked = [v].iter().copied().collect();
ca.rename(self.name());
ca.into_series()
}
Expand Down
9 changes: 5 additions & 4 deletions polars/polars-core/src/chunked_array/ops/fill_none.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,11 @@ where
.fill_none_with_value(self.max().ok_or_else(|| {
PolarsError::Other("Could not determine fill value".into())
})?)?,
FillNoneStrategy::Mean => self
.fill_none_with_value(self.mean().ok_or_else(|| {
PolarsError::Other("Could not determine fill value".into())
})?)?,
FillNoneStrategy::Mean => self.fill_none_with_value(
self.mean()
.map(|v| NumCast::from(v).unwrap())
.ok_or_else(|| PolarsError::Other("Could not determine fill value".into()))?,
)?,
FillNoneStrategy::One => return self.fill_none_with_value(One::one()),
FillNoneStrategy::Zero => return self.fill_none_with_value(Zero::zero()),
FillNoneStrategy::MinBound => return self.fill_none_with_value(Bounded::min_value()),
Expand Down
24 changes: 18 additions & 6 deletions polars/polars-core/src/chunked_array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,24 +442,36 @@ pub trait ChunkApply<'a, A, B> {
pub trait ChunkAgg<T> {
/// Aggregate the sum of the ChunkedArray.
/// Returns `None` if the array is empty or only contains null values.
fn sum(&self) -> Option<T>;
fn sum(&self) -> Option<T> {
None
}

fn min(&self) -> Option<T>;
fn min(&self) -> Option<T> {
None
}
/// Returns the maximum value in the array, according to the natural order.
/// Returns `None` if the array is empty or only contains null values.
fn max(&self) -> Option<T>;
fn max(&self) -> Option<T> {
None
}

/// Returns the mean value in the array.
/// Returns `None` if the array is empty or only contains null values.
fn mean(&self) -> Option<T>;
fn mean(&self) -> Option<f64> {
None
}

/// Returns the mean value in the array.
/// Returns `None` if the array is empty or only contains null values.
fn median(&self) -> Option<T>;
fn median(&self) -> Option<T> {
None
}

/// Aggregate a given quantile of the ChunkedArray.
/// Returns `None` if the array is empty or only contains null values.
fn quantile(&self, quantile: f64) -> Result<Option<T>>;
fn quantile(&self, _quantile: f64) -> Result<Option<T>> {
Ok(None)
}
}

/// Variance and standard deviation aggregation.
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/series/implementations/dates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ macro_rules! impl_dyn_series {
self.0.slice(offset, length).into_series()
}

fn mean(&self) -> Option<f64> {
cast_and_apply!(self, mean,)
}

fn append(&mut self, other: &Series) -> Result<()> {
if self.0.dtype() == other.dtype() {
// todo! add object
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/series/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,10 @@ macro_rules! impl_dyn_series {
ChunkFilter::filter(&self.0, filter).map(|ca| ca.into_series())
}

fn mean(&self) -> Option<f64> {
self.0.mean()
}

fn take(&self, indices: &UInt32Chunked) -> Series {
let indices = if indices.chunks.len() > 1 {
Cow::Owned(indices.rechunk())
Expand Down
18 changes: 6 additions & 12 deletions polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,12 @@ pub trait SeriesTrait: Send + Sync + private::PrivateSeries {
}
}

/// Returns the mean value in the array
/// Returns an option because the array is nullable.
fn mean(&self) -> Option<f64> {
unimplemented!()
}

/// Create a new Series filled with values at that index.
///
/// # Example
Expand Down Expand Up @@ -1126,18 +1132,6 @@ impl Series {
.and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
}

/// Returns the mean value in the array
/// Returns an option because the array is nullable.
pub fn mean<T>(&self) -> Option<T>
where
T: NumCast,
{
self.cast::<Float64Type>()
.ok()
.map(|s| s.mean_as_series())
.and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
}

/// Explode a list or utf8 Series. This expands every item to a new row..
pub fn explode(&self) -> Result<Series> {
match self.dtype() {
Expand Down
9 changes: 6 additions & 3 deletions py-polars/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ The Rust crate `polars` has its own changelog.

### polars 0.7.5
* bug fix
- fix bug in vectorized hashing algorithm that affected groupbys with null values
- fix bug in vectorized hashing algorithm that affected groupbys with null values: #523

* feature
- use lazy groupby API/DSL in eager API #522
- make sort groupby-context aware #522
- use lazy groupby API/DSL in eager API: #522
- make sort groupby-context aware: #522

* performance
- improve sort algorithms for sort and argsort: #526

### polars 0.7.4
* performance
Expand Down
3 changes: 1 addition & 2 deletions py-polars/polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,7 @@ def mean(self):
"""
Reduce this Series to the mean value.
"""
# use float type for mean aggregations no matter of base type
return self._s.mean_f64()
return self._s.mean()

def min(self):
"""
Expand Down
25 changes: 4 additions & 21 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ impl PySeries {
dt as u8
}

pub fn mean(&self) -> Option<f64> {
self.series.mean()
}

pub fn n_chunks(&self) -> usize {
self.series.n_chunks()
}
Expand Down Expand Up @@ -1415,27 +1419,6 @@ impl_max!(max_i64, i64);
impl_max!(max_f32, f32);
impl_max!(max_f64, f64);

macro_rules! impl_mean {
($name:ident, $type:ty) => {
#[pymethods]
impl PySeries {
pub fn $name(&self) -> PyResult<Option<$type>> {
Ok(self.series.mean())
}
}
};
}

impl_mean!(mean_u8, u8);
impl_mean!(mean_u16, u16);
impl_mean!(mean_u32, u32);
impl_mean!(mean_u64, u64);
impl_mean!(mean_i8, i8);
impl_mean!(mean_i16, i16);
impl_mean!(mean_i32, i32);
impl_mean!(mean_i64, i64);
impl_mean!(mean_f32, f32);
impl_mean!(mean_f64, f64);

macro_rules! impl_eq_num {
($name:ident, $type:ty) => {
Expand Down

0 comments on commit 4f0d244

Please sign in to comment.