feat[python]: improve fill_null ergonomics (#5045)

pola-rs · Sep 30, 2022 · ea14910 · ea14910
1 parent 1f9b7e6
commit ea14910
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 16 deletions.
diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -4437,6 +4437,7 @@ def fill_null(
         value: Any | None = None,
         strategy: FillNullStrategy | None = None,
         limit: int | None = None,
+        matches_supertype: bool = True,
     ) -> DF:
         """
         Fill null values using the specified value or strategy.
@@ -4450,6 +4451,8 @@ def fill_null(
         limit
             Number of consecutive null values to fill when using the 'forward' or
             'backward' strategy.
+        matches_supertype
+            Fill all matching supertype of the fill ``value``.
 
         Returns
         -------
@@ -4484,7 +4487,12 @@ def fill_null(
         └─────┴──────┘
 
         """
-        return self.select(pli.all().fill_null(value, strategy, limit))
+        return self._from_pydf(
+            self.lazy()
+            .fill_null(value, strategy, limit, matches_supertype)
+            .collect(no_optimization=True)
+            ._df
+        )
 
     def fill_nan(self, fill_value: pli.Expr | int | float | None) -> DataFrame:
         """

diff --git a/py-polars/polars/internals/lazyframe/frame.py b/py-polars/polars/internals/lazyframe/frame.py
@@ -12,7 +12,21 @@
 
 from polars import internals as pli
 from polars.cfg import Config
-from polars.datatypes import DataType, PolarsDataType, Schema, py_type_to_dtype
+from polars.datatypes import (
+    Boolean,
+    Categorical,
+    DataType,
+    Float32,
+    Float64,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    PolarsDataType,
+    Schema,
+    Utf8,
+    py_type_to_dtype,
+)
 from polars.internals import selection_to_pyexpr_list
 from polars.internals.lazyframe.groupby import LazyGroupBy
 from polars.internals.slice import LazyPolarsSlice
@@ -2054,6 +2068,7 @@ def fill_null(
         value: Any | None = None,
         strategy: FillNullStrategy | None = None,
         limit: int | None = None,
+        matches_supertype: bool = True,
     ) -> LDF:
         """
         Fill null values using the specified value or strategy.
@@ -2067,8 +2082,38 @@ def fill_null(
         limit
             Number of consecutive null values to fill when using the 'forward' or
             'backward' strategy.
+        matches_supertype
+            Fill all matching supertype of the fill ``value``.
+
+        """
+        if value is not None:
+            if isinstance(value, pli.Expr):
+                dtype = next(iter(self.select(value).schema.values()))
+                dtypes = [dtype]
+            elif isinstance(value, bool):
+                dtypes = [Boolean]
+            elif isinstance(value, int):
+                dtypes = [Int64]
+                if matches_supertype:
+                    dtypes.append(Int8)
+                    dtypes.append(Int16)
+                    dtypes.append(Int32)
+                    dtypes.append(Float32)
+                    dtypes.append(Float64)
+            elif isinstance(value, float):
+                dtypes = [Float64]
+                if matches_supertype:
+                    dtypes.append(Int8)
+                    dtypes.append(Int16)
+                    dtypes.append(Int32)
+                    dtypes.append(Int64)
+                    dtypes.append(Float32)
+                    dtypes.append(Float64)
+            elif isinstance(value, str):
+                dtypes = [Utf8, Categorical]
+
+            return self.with_column(pli.col(dtypes).fill_null(value, strategy, limit))
 
-        """
         return self.select(pli.all().fill_null(value, strategy, limit))
 
     def fill_nan(self: LDF, fill_value: int | str | float | pli.Expr | None) -> LDF:

diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py
@@ -195,16 +195,3 @@ def test_filter_not_of_type_bool() -> None:
         pl.ComputeError, match="Filter predicate must be of type Boolean, got"
     ):
         df.filter(pl.col("json_val").str.json_path_match("$.a"))
-
-
-def test_fill_null_unknown_supertype() -> None:
-    df = pl.DataFrame(
-        [
-            pl.Series("a", [1, None, 3]),
-            pl.Series("b", ["hello", "at the", "bar"], dtype=pl.Categorical),
-            pl.Series("c", [1, 1, None]),
-        ]
-    )
-
-    with pytest.raises(pl.SchemaError):
-        df.fill_null(0)
diff --git a/py-polars/tests/unit/test_series.py b/py-polars/tests/unit/test_series.py
@@ -649,6 +649,39 @@ def test_fill_null() -> None:
     assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
     assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]
 
+    df = pl.DataFrame(
+        [
+            pl.Series("i32", [1, 2, None], dtype=pl.Int32),
+            pl.Series("i64", [1, 2, None], dtype=pl.Int64),
+            pl.Series("f32", [1, 2, None], dtype=pl.Float32),
+            pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),
+            pl.Series("str", ["a", "b", None], dtype=pl.Utf8),
+            pl.Series("bool", [True, True, None], dtype=pl.Boolean),
+        ]
+    )
+
+    assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(
+        False
+    ).to_dict(False) == {
+        "i32": [1, 2, None],
+        "i64": [1, 2, 0],
+        "f32": [1.0, 2.0, None],
+        "cat": ["a", "b", "bar"],
+        "str": ["a", "b", "bar"],
+        "bool": [True, True, False],
+    }
+
+    assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(
+        False
+    ).to_dict(False) == {
+        "i32": [1, 2, 0],
+        "i64": [1, 2, 0],
+        "f32": [1.0, 2.0, 0.0],
+        "cat": ["a", "b", "bar"],
+        "str": ["a", "b", "bar"],
+        "bool": [True, True, False],
+    }
+
 
 def test_fill_nan() -> None:
     nan = float("nan")