ChunkAgg::mean -> Option<f64> instead of Option<Native>

pola-rs · Apr 8, 2021 · 4f0d244 · 4f0d244
1 parent 6eecdbb
commit 4f0d244
Show file tree

Hide file tree

Showing 9 changed files with 59 additions and 60 deletions.
diff --git a/polars/polars-core/src/chunked_array/ops/aggregate.rs b/polars/polars-core/src/chunked_array/ops/aggregate.rs
@@ -119,10 +119,9 @@ where
         }
     }
 
-    fn mean(&self) -> Option<T::Native> {
+    fn mean(&self) -> Option<f64> {
         let len = (self.len() - self.null_count()) as f64;
-        self.sum()
-            .map(|v| NumCast::from(v.to_f64().unwrap() / len).unwrap())
+        self.sum().map(|v| v.to_f64().unwrap() / len)
     }
 
     fn median(&self) -> Option<T::Native> {
@@ -239,9 +238,9 @@ impl ChunkAgg<u32> for BooleanChunked {
         Some(min_max_helper(self, false))
     }
 
-    fn mean(&self) -> Option<u32> {
-        let len = self.len() - self.null_count();
-        self.sum().map(|v| (v as usize / len) as u32)
+    fn mean(&self) -> Option<f64> {
+        let len = (self.len() - self.null_count()) as f64;
+        self.sum().map(|v| v as f64 / len)
     }
 
     fn median(&self) -> Option<u32> {
@@ -260,6 +259,9 @@ impl ChunkAgg<u32> for BooleanChunked {
     }
 }
 
+impl ChunkAgg<Series> for ListChunked {}
+impl ChunkAgg<String> for Utf8Chunked {}
+
 // Needs the same trait bounds as the implementation of ChunkedArray<T> of dyn Series
 impl<T> ChunkAggSeries for ChunkedArray<T>
 where
@@ -289,11 +291,8 @@ where
         if self.null_count() == self.len() {
             Self::full_null(self.name(), 1).into_series()
         } else {
-            let s = self.sum_as_series();
-            let mut out =
-                s.cast::<Float64Type>().unwrap() / (self.len() - self.null_count()) as f64;
-            out.rename(self.name());
-            out
+            let val = [self.mean()];
+            Series::new(self.name(), val)
         }
     }
     fn median_as_series(&self) -> Series {
@@ -421,7 +420,7 @@ impl ChunkAggSeries for BooleanChunked {
     }
     fn mean_as_series(&self) -> Series {
         let v = ChunkAgg::mean(self);
-        let mut ca: UInt32Chunked = [v].iter().copied().collect();
+        let mut ca: Float64Chunked = [v].iter().copied().collect();
         ca.rename(self.name());
         ca.into_series()
     }

diff --git a/polars/polars-core/src/chunked_array/ops/fill_none.rs b/polars/polars-core/src/chunked_array/ops/fill_none.rs
@@ -105,10 +105,11 @@ where
                 .fill_none_with_value(self.max().ok_or_else(|| {
                     PolarsError::Other("Could not determine fill value".into())
                 })?)?,
-            FillNoneStrategy::Mean => self
-                .fill_none_with_value(self.mean().ok_or_else(|| {
-                    PolarsError::Other("Could not determine fill value".into())
-                })?)?,
+            FillNoneStrategy::Mean => self.fill_none_with_value(
+                self.mean()
+                    .map(|v| NumCast::from(v).unwrap())
+                    .ok_or_else(|| PolarsError::Other("Could not determine fill value".into()))?,
+            )?,
             FillNoneStrategy::One => return self.fill_none_with_value(One::one()),
             FillNoneStrategy::Zero => return self.fill_none_with_value(Zero::zero()),
             FillNoneStrategy::MinBound => return self.fill_none_with_value(Bounded::min_value()),

diff --git a/polars/polars-core/src/chunked_array/ops/mod.rs b/polars/polars-core/src/chunked_array/ops/mod.rs
@@ -442,24 +442,36 @@ pub trait ChunkApply<'a, A, B> {
 pub trait ChunkAgg<T> {
     /// Aggregate the sum of the ChunkedArray.
     /// Returns `None` if the array is empty or only contains null values.
-    fn sum(&self) -> Option<T>;
+    fn sum(&self) -> Option<T> {
+        None
+    }
 
-    fn min(&self) -> Option<T>;
+    fn min(&self) -> Option<T> {
+        None
+    }
     /// Returns the maximum value in the array, according to the natural order.
     /// Returns `None` if the array is empty or only contains null values.
-    fn max(&self) -> Option<T>;
+    fn max(&self) -> Option<T> {
+        None
+    }
 
     /// Returns the mean value in the array.
     /// Returns `None` if the array is empty or only contains null values.
-    fn mean(&self) -> Option<T>;
+    fn mean(&self) -> Option<f64> {
+        None
+    }
 
     /// Returns the mean value in the array.
     /// Returns `None` if the array is empty or only contains null values.
-    fn median(&self) -> Option<T>;
+    fn median(&self) -> Option<T> {
+        None
+    }
 
     /// Aggregate a given quantile of the ChunkedArray.
     /// Returns `None` if the array is empty or only contains null values.
-    fn quantile(&self, quantile: f64) -> Result<Option<T>>;
+    fn quantile(&self, _quantile: f64) -> Result<Option<T>> {
+        Ok(None)
+    }
 }
 
 /// Variance and standard deviation aggregation.

diff --git a/polars/polars-core/src/series/implementations/dates.rs b/polars/polars-core/src/series/implementations/dates.rs
@@ -355,6 +355,10 @@ macro_rules! impl_dyn_series {
                 self.0.slice(offset, length).into_series()
             }
 
+            fn mean(&self) -> Option<f64> {
+                cast_and_apply!(self, mean,)
+            }
+
             fn append(&mut self, other: &Series) -> Result<()> {
                 if self.0.dtype() == other.dtype() {
                     // todo! add object

diff --git a/polars/polars-core/src/series/implementations/mod.rs b/polars/polars-core/src/series/implementations/mod.rs
@@ -533,6 +533,10 @@ macro_rules! impl_dyn_series {
                 ChunkFilter::filter(&self.0, filter).map(|ca| ca.into_series())
             }
 
+            fn mean(&self) -> Option<f64> {
+                self.0.mean()
+            }
+
             fn take(&self, indices: &UInt32Chunked) -> Series {
                 let indices = if indices.chunks.len() > 1 {
                     Cow::Owned(indices.rechunk())

diff --git a/polars/polars-core/src/series/mod.rs b/polars/polars-core/src/series/mod.rs
@@ -468,6 +468,12 @@ pub trait SeriesTrait: Send + Sync + private::PrivateSeries {
         }
     }
 
+    /// Returns the mean value in the array
+    /// Returns an option because the array is nullable.
+    fn mean(&self) -> Option<f64> {
+        unimplemented!()
+    }
+
     /// Create a new Series filled with values at that index.
     ///
     /// # Example
@@ -1126,18 +1132,6 @@ impl Series {
             .and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
     }
 
-    /// Returns the mean value in the array
-    /// Returns an option because the array is nullable.
-    pub fn mean<T>(&self) -> Option<T>
-    where
-        T: NumCast,
-    {
-        self.cast::<Float64Type>()
-            .ok()
-            .map(|s| s.mean_as_series())
-            .and_then(|s| s.f64().unwrap().get(0).and_then(T::from))
-    }
-
     /// Explode a list or utf8 Series. This expands every item to a new row..
     pub fn explode(&self) -> Result<Series> {
         match self.dtype() {

diff --git a/py-polars/CHANGELOG.md b/py-polars/CHANGELOG.md
@@ -4,11 +4,14 @@ The Rust crate `polars` has its own changelog.
 
 ### polars 0.7.5
 * bug fix
-  - fix bug in vectorized hashing algorithm that affected groupbys with null values
+  - fix bug in vectorized hashing algorithm that affected groupbys with null values: #523
 
 * feature
-  - use lazy groupby API/DSL in eager API #522
-  - make sort groupby-context aware #522
+  - use lazy groupby API/DSL in eager API: #522
+  - make sort groupby-context aware: #522
+
+* performance
+  - improve sort algorithms for sort and argsort: #526
 
 ### polars 0.7.4
 * performance

diff --git a/py-polars/polars/series.py b/py-polars/polars/series.py
@@ -525,8 +525,7 @@ def mean(self):
         """
         Reduce this Series to the mean value.
         """
-        # use float type for mean aggregations no matter of base type
-        return self._s.mean_f64()
+        return self._s.mean()
 
     def min(self):
         """

diff --git a/py-polars/src/series.rs b/py-polars/src/series.rs
@@ -300,6 +300,10 @@ impl PySeries {
         dt as u8
     }
 
+    pub fn mean(&self) -> Option<f64> {
+        self.series.mean()
+    }
+
     pub fn n_chunks(&self) -> usize {
         self.series.n_chunks()
     }
@@ -1415,27 +1419,6 @@ impl_max!(max_i64, i64);
 impl_max!(max_f32, f32);
 impl_max!(max_f64, f64);
 
-macro_rules! impl_mean {
-    ($name:ident, $type:ty) => {
-        #[pymethods]
-        impl PySeries {
-            pub fn $name(&self) -> PyResult<Option<$type>> {
-                Ok(self.series.mean())
-            }
-        }
-    };
-}
-
-impl_mean!(mean_u8, u8);
-impl_mean!(mean_u16, u16);
-impl_mean!(mean_u32, u32);
-impl_mean!(mean_u64, u64);
-impl_mean!(mean_i8, i8);
-impl_mean!(mean_i16, i16);
-impl_mean!(mean_i32, i32);
-impl_mean!(mean_i64, i64);
-impl_mean!(mean_f32, f32);
-impl_mean!(mean_f64, f64);
 
 macro_rules! impl_eq_num {
     ($name:ident, $type:ty) => {