fill_nan expression and fix #1478

pola-rs · Oct 2, 2021 · a4464e9 · a4464e9
1 parent 898d99e
commit a4464e9
Show file tree

Hide file tree

Showing 7 changed files with 54 additions and 6 deletions.
diff --git a/polars/polars-core/src/series/implementations/dates.rs b/polars/polars-core/src/series/implementations/dates.rs
@@ -148,7 +148,12 @@ macro_rules! impl_dyn_series {
 
             #[cfg(feature = "zip_with")]
             fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> Result<Series> {
-                try_physical_dispatch!(self, zip_with_same_type, mask, other)
+                let other = if self.dtype() == &DataType::Date32 {
+                    other.cast_with_dtype(&DataType::Int32)?
+                } else {
+                    other.cast_with_dtype(&DataType::Int64)?
+                };
+                try_physical_dispatch!(self, zip_with_same_type, mask, &other)
             }
 
             fn vec_hash(&self, random_state: RandomState) -> AlignedVec<u64> {

diff --git a/polars/polars-lazy/src/dsl.rs b/polars/polars-lazy/src/dsl.rs
@@ -1110,7 +1110,7 @@ impl Expr {
         }
     }
 
-    /// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::fill_null).
+    /// Replace the null values by a value.
     pub fn fill_null(self, fill_value: Expr) -> Self {
         map_binary_lazy_field(
             self,
@@ -1132,6 +1132,11 @@ impl Expr {
             },
         )
     }
+
+    /// Replace the floating point `NaN` values by a value.
+    pub fn fill_nan(self, fill_value: Expr) -> Self {
+        when(self.clone().is_nan()).then(fill_value).otherwise(self)
+    }
     /// Count the values of the Series
     /// or
     /// Get counts of the group by operation.

diff --git a/polars/polars-lazy/src/physical_plan/expressions/cast.rs b/polars/polars-lazy/src/physical_plan/expressions/cast.rs
@@ -17,12 +17,24 @@ impl CastExpr {
         // We use the booleanarray as null series, because we have no null array.
         // in a ternary or binary operation, we then do type coercion to matching supertype.
         // here we create a null array for the types we cannot cast to from a booleanarray
-        if matches!(self.data_type, DataType::List(_)) {
-            // the booleanarray is hacked as null type
-            if input.bool().is_ok() && input.null_count() == input.len() {
-                return Ok(ListChunked::full_null(input.name(), input.len()).into_series());
+
+        if input.bool().is_ok() && input.null_count() == input.len() {
+            match self.data_type {
+                DataType::List(_) => {
+                    return Ok(ListChunked::full_null(input.name(), input.len()).into_series())
+                }
+                #[cfg(feature = "dtype-date32")]
+                DataType::Date32 => {
+                    return Ok(Date32Chunked::full_null(input.name(), input.len()).into_series())
+                }
+                #[cfg(feature = "dtype-date64")]
+                DataType::Date64 => {
+                    return Ok(Date64Chunked::full_null(input.name(), input.len()).into_series())
+                }
+                _ => {}
             }
         }
+
         if self.strict {
             input.strict_cast(&self.data_type)
         } else {

diff --git a/polars/polars-lazy/src/test.rs b/polars/polars-lazy/src/test.rs
@@ -1885,3 +1885,17 @@ fn test_power_in_agg_list2() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+#[cfg(feature = "dtype-date32")]
+fn test_fill_nan() -> Result<()> {
+    let s0 = Series::new("date", &[1, 2, 3]).cast_with_dtype(&DataType::Date32)?;
+    let s1 = Series::new("float", &[Some(1.0), Some(f32::NAN), Some(3.0)]);
+
+    let df = DataFrame::new(vec![s0, s1])?;
+    let out = df.lazy().fill_nan(Null {}.lit()).collect()?;
+    let out = out.column("float")?;
+    assert_eq!(Vec::from(out.f32()?), &[Some(1.0), None, Some(3.0)]);
+
+    Ok(())
+}
diff --git a/py-polars/docs/source/reference/expression.rst b/py-polars/docs/source/reference/expression.rst
@@ -153,6 +153,7 @@ Manipulation/ selection
     Expr.shift
     Expr.shift_and_fill
     Expr.fill_null
+    Expr.fill_nan
     Expr.forward_fill
     Expr.backward_fill
     Expr.reverse

diff --git a/py-polars/polars/lazy/expr.py b/py-polars/polars/lazy/expr.py
@@ -776,6 +776,13 @@ def fill_null(self, fill_value: Union[str, int, float, "Expr"]) -> "Expr":
         fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True)
         return wrap_expr(self._pyexpr.fill_null(fill_value._pyexpr))
 
+    def fill_nan(self, fill_value: Union[str, int, float, "Expr"]) -> "Expr":
+        """
+        Fill none value with a fill value
+        """
+        fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True)
+        return wrap_expr(self._pyexpr.fill_nan(fill_value._pyexpr))
+
     def forward_fill(self) -> "Expr":
         """
         Fill missing values with the latest seen values

diff --git a/py-polars/src/lazy/dsl.rs b/py-polars/src/lazy/dsl.rs
@@ -232,6 +232,10 @@ impl PyExpr {
         self.clone().inner.fill_null(expr.inner).into()
     }
 
+    pub fn fill_nan(&self, expr: PyExpr) -> PyExpr {
+        self.clone().inner.fill_nan(expr.inner).into()
+    }
+
     pub fn filter(&self, predicate: PyExpr) -> PyExpr {
         self.clone().inner.filter(predicate.inner).into()
     }