implement clip and ceil (#2369)

pola-rs · Jan 14, 2022 · 4233bc2 · 4233bc2
1 parent a9ebfe5
commit 4233bc2
Show file tree

Hide file tree

Showing 10 changed files with 155 additions and 53 deletions.
diff --git a/polars/polars-core/src/series/mod.rs b/polars/polars-core/src/series/mod.rs
@@ -436,47 +436,6 @@ impl Series {
         }
     }
 
-    /// Round underlying floating point array to given decimal.
-    #[cfg(feature = "round_series")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
-    pub fn round(&self, decimals: u32) -> Result<Self> {
-        use num::traits::Pow;
-        if let Ok(ca) = self.f32() {
-            let multiplier = 10.0.pow(decimals as f32) as f32;
-            let s = ca
-                .apply(|val| (val * multiplier).round() / multiplier)
-                .into_series();
-            return Ok(s);
-        }
-        if let Ok(ca) = self.f64() {
-            let multiplier = 10.0.pow(decimals as f32) as f64;
-            let s = ca
-                .apply(|val| (val * multiplier).round() / multiplier)
-                .into_series();
-            return Ok(s);
-        }
-        Err(PolarsError::SchemaMisMatch(
-            format!("{:?} is not a floating point datatype", self.dtype()).into(),
-        ))
-    }
-
-    #[cfg(feature = "round_series")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
-    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
-    pub fn floor(&self) -> Result<Self> {
-        if let Ok(ca) = self.f32() {
-            let s = ca.apply(|val| val.floor()).into_series();
-            return Ok(s);
-        }
-        if let Ok(ca) = self.f64() {
-            let s = ca.apply(|val| val.floor()).into_series();
-            return Ok(s);
-        }
-        Err(PolarsError::SchemaMisMatch(
-            format!("{:?} is not a floating point datatype", self.dtype()).into(),
-        ))
-    }
-
     #[cfg(feature = "dot_product")]
     #[cfg_attr(docsrs, doc(cfg(feature = "dot_product")))]
     pub fn dot(&self, other: &Series) -> Option<f64> {

diff --git a/polars/polars-core/src/series/ops/mod.rs b/polars/polars-core/src/series/ops/mod.rs
@@ -8,6 +8,8 @@ pub mod moment;
 mod null;
 #[cfg(feature = "pct_change")]
 pub mod pct_change;
+#[cfg(feature = "round_series")]
+mod round;
 mod to_list;
 
 #[derive(Copy, Clone)]

diff --git a/polars/polars-core/src/series/ops/round.rs b/polars/polars-core/src/series/ops/round.rs
@@ -0,0 +1,94 @@
+use crate::prelude::*;
+
+impl Series {
+    /// Round underlying floating point array to given decimal.
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    pub fn round(&self, decimals: u32) -> Result<Self> {
+        use num::traits::Pow;
+        if let Ok(ca) = self.f32() {
+            let multiplier = 10.0.pow(decimals as f32) as f32;
+            let s = ca
+                .apply(|val| (val * multiplier).round() / multiplier)
+                .into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.f64() {
+            let multiplier = 10.0.pow(decimals as f32) as f64;
+            let s = ca
+                .apply(|val| (val * multiplier).round() / multiplier)
+                .into_series();
+            return Ok(s);
+        }
+        Err(PolarsError::SchemaMisMatch(
+            format!("{:?} is not a floating point datatype", self.dtype()).into(),
+        ))
+    }
+
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
+    pub fn floor(&self) -> Result<Self> {
+        if let Ok(ca) = self.f32() {
+            let s = ca.apply(|val| val.floor()).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.f64() {
+            let s = ca.apply(|val| val.floor()).into_series();
+            return Ok(s);
+        }
+        Err(PolarsError::SchemaMisMatch(
+            format!("{:?} is not a floating point datatype", self.dtype()).into(),
+        ))
+    }
+
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    /// Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
+    pub fn ceil(&self) -> Result<Self> {
+        if let Ok(ca) = self.f32() {
+            let s = ca.apply(|val| val.ceil()).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.f64() {
+            let s = ca.apply(|val| val.ceil()).into_series();
+            return Ok(s);
+        }
+        Err(PolarsError::SchemaMisMatch(
+            format!("{:?} is not a floating point datatype", self.dtype()).into(),
+        ))
+    }
+
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    /// Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
+    pub fn clip(&self, min: f64, max: f64) -> Result<Self> {
+        if let Ok(ca) = self.f32() {
+            let min = min as f32;
+            let max = max as f32;
+            let s = ca.apply(|val| val.clamp(min, max)).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.f64() {
+            let s = ca.apply(|val| val.clamp(min, max)).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.i64() {
+            let min = min as i64;
+            let max = max as i64;
+            let s = ca.apply(|val| val.clamp(min, max)).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.i32() {
+            let min = min as i32;
+            let max = max as i32;
+            let s = ca.apply(|val| val.clamp(min, max)).into_series();
+            return Ok(s);
+        }
+        if let Ok(ca) = self.u32() {
+            let min = min as u32;
+            let max = max as u32;
+            let s = ca.apply(|val| val.clamp(min, max)).into_series();
+            return Ok(s);
+        }
+        Err(PolarsError::SchemaMisMatch(
+            format!("{:?} is not one of {{Float32, Float64, Int32, Int64, UInt32}} consider using a when -> then -> otherwise", self.dtype()).into(),
+        ))
+    }
+}
diff --git a/polars/polars-lazy/src/dsl.rs b/polars/polars-lazy/src/dsl.rs
@@ -1183,6 +1183,22 @@ impl Expr {
             .with_fmt("floor")
     }
 
+    /// Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
+    #[cfg(feature = "round_series")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    pub fn ceil(self) -> Self {
+        self.map(move |s: Series| s.ceil(), GetOutput::same_type())
+            .with_fmt("ceil")
+    }
+
+    /// Clip underlying values to a set boundary.
+    #[cfg(feature = "round_series")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "round_series")))]
+    pub fn clip(self, min: f64, max: f64) -> Self {
+        self.map(move |s: Series| s.clip(min, max), GetOutput::same_type())
+            .with_fmt("clip")
+    }
+
     /// Convert all values to their absolute/positive value.
     #[cfg(feature = "abs")]
     #[cfg_attr(docsrs, doc(cfg(feature = "abs")))]

diff --git a/py-polars/docs/source/reference/expression.rst b/py-polars/docs/source/reference/expression.rst
@@ -172,6 +172,7 @@ Manipulation/ selection
     Expr.repeat_by
     Expr.round
     Expr.floor
+    Expr.ceil
     Expr.cast
     Expr.sort
     Expr.arg_sort

diff --git a/py-polars/docs/source/reference/series.rst b/py-polars/docs/source/reference/series.rst
@@ -168,6 +168,7 @@ Manipulation/ selection
     Series.cast
     Series.round
     Series.floor
+    Series.ceil
     Series.set_at_idx
     Series.fill_null
     Series.zip_with

diff --git a/py-polars/polars/internals/expr.py b/py-polars/polars/internals/expr.py
@@ -737,6 +737,14 @@ def floor(self) -> "Expr":
         """
         return wrap_expr(self._pyexpr.floor())
 
+    def ceil(self) -> "Expr":
+        """
+        Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
+
+        Only works on floating point Series
+        """
+        return wrap_expr(self._pyexpr.ceil())
+
     def round(self, decimals: int) -> "Expr":
         """
         Round underlying floating point data by `decimals` digits.
@@ -1979,7 +1987,11 @@ def kurtosis(self, fisher: bool = True, bias: bool = True) -> "Expr":
 
     def clip(self, min_val: Union[int, float], max_val: Union[int, float]) -> "Expr":
         """
-        Clip (limit) the values in an array.
+        Clip (limit) the values in an array to any value that fits in 64 floating poitns range.
+
+        Only works for the following dtypes: {Int32, Int64, Float32, Float64, UInt32}.
+
+        If you want to clip other dtypes, consider writing a when -> then -> otherwise expression
 
         Parameters
         ----------
@@ -1988,16 +2000,7 @@ def clip(self, min_val: Union[int, float], max_val: Union[int, float]) -> "Expr"
         max_val
             Maximum value.
         """
-        min_val_lit = pli.lit(min_val)
-        max_val_lit = pli.lit(max_val)
-
-        return (
-            pli.when(self < min_val_lit)
-            .then(min_val_lit)
-            .when(self > max_val_lit)
-            .then(max_val_lit)
-            .otherwise(self)
-        ).keep_name()
+        return wrap_expr(self._pyexpr.clip(min_val, max_val))
 
     def lower_bound(self) -> "Expr":
         """

diff --git a/py-polars/polars/internals/series.py b/py-polars/polars/internals/series.py
@@ -2195,6 +2195,14 @@ def floor(self) -> "Series":
         """
         return wrap_s(self._s.floor())
 
+    def ceil(self) -> "Series":
+        """
+        Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
+
+        Only works on floating point Series
+        """
+        return self.to_frame().select(pli.col(self.name).ceil()).to_series()
+
     def round(self, decimals: int) -> "Series":
         """
         Round underlying floating point data by `decimals` digits.
@@ -3200,7 +3208,11 @@ def kurtosis(self, fisher: bool = True, bias: bool = True) -> Optional[float]:
 
     def clip(self, min_val: Union[int, float], max_val: Union[int, float]) -> "Series":
         """
-        Clip (limit) the values in an array.
+        Clip (limit) the values in an array to any value that fits in 64 floating poitns range.
+
+        Only works for the following dtypes: {Int32, Int64, Float32, Float64, UInt32}.
+
+        If you want to clip other dtypes, consider writing a when -> then -> otherwise expression
 
         Parameters
         ----------

diff --git a/py-polars/src/lazy/dsl.rs b/py-polars/src/lazy/dsl.rs
@@ -280,6 +280,14 @@ impl PyExpr {
         self.clone().inner.floor().into()
     }
 
+    pub fn ceil(&self) -> PyExpr {
+        self.clone().inner.ceil().into()
+    }
+
+    pub fn clip(&self, min: f64, max: f64) -> PyExpr {
+        self.clone().inner.clip(min, max).into()
+    }
+
     pub fn abs(&self) -> PyExpr {
         self.clone().inner.abs().into()
     }

diff --git a/py-polars/tests/test_series.py b/py-polars/tests/test_series.py
@@ -1426,3 +1426,9 @@ def test_strip() -> None:
     verify_series_and_expr_api(a, expected, "str.lstrip")
     expected = pl.Series("a", ["trailing", "leading", "both"])
     verify_series_and_expr_api(a, expected, "str.strip")
+
+
+def test_ceil() -> None:
+    a = pl.Series("a", [1.8, 1.2, 3.0])
+    expected = pl.Series("a", [2.0, 2.0, 3.0])
+    verify_series_and_expr_api(a, expected, "ceil")