Skip to content

Commit

Permalink
feat[python]: improve fill_null ergonomics (#5045)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 30, 2022
1 parent 1f9b7e6 commit ea14910
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 16 deletions.
10 changes: 9 additions & 1 deletion py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4437,6 +4437,7 @@ def fill_null(
value: Any | None = None,
strategy: FillNullStrategy | None = None,
limit: int | None = None,
matches_supertype: bool = True,
) -> DF:
"""
Fill null values using the specified value or strategy.
Expand All @@ -4450,6 +4451,8 @@ def fill_null(
limit
Number of consecutive null values to fill when using the 'forward' or
'backward' strategy.
matches_supertype
Fill all matching supertype of the fill ``value``.
Returns
-------
Expand Down Expand Up @@ -4484,7 +4487,12 @@ def fill_null(
└─────┴──────┘
"""
return self.select(pli.all().fill_null(value, strategy, limit))
return self._from_pydf(
self.lazy()
.fill_null(value, strategy, limit, matches_supertype)
.collect(no_optimization=True)
._df
)

def fill_nan(self, fill_value: pli.Expr | int | float | None) -> DataFrame:
"""
Expand Down
49 changes: 47 additions & 2 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,21 @@

from polars import internals as pli
from polars.cfg import Config
from polars.datatypes import DataType, PolarsDataType, Schema, py_type_to_dtype
from polars.datatypes import (
Boolean,
Categorical,
DataType,
Float32,
Float64,
Int8,
Int16,
Int32,
Int64,
PolarsDataType,
Schema,
Utf8,
py_type_to_dtype,
)
from polars.internals import selection_to_pyexpr_list
from polars.internals.lazyframe.groupby import LazyGroupBy
from polars.internals.slice import LazyPolarsSlice
Expand Down Expand Up @@ -2054,6 +2068,7 @@ def fill_null(
value: Any | None = None,
strategy: FillNullStrategy | None = None,
limit: int | None = None,
matches_supertype: bool = True,
) -> LDF:
"""
Fill null values using the specified value or strategy.
Expand All @@ -2067,8 +2082,38 @@ def fill_null(
limit
Number of consecutive null values to fill when using the 'forward' or
'backward' strategy.
matches_supertype
Fill all matching supertype of the fill ``value``.
"""
if value is not None:
if isinstance(value, pli.Expr):
dtype = next(iter(self.select(value).schema.values()))
dtypes = [dtype]
elif isinstance(value, bool):
dtypes = [Boolean]
elif isinstance(value, int):
dtypes = [Int64]
if matches_supertype:
dtypes.append(Int8)
dtypes.append(Int16)
dtypes.append(Int32)
dtypes.append(Float32)
dtypes.append(Float64)
elif isinstance(value, float):
dtypes = [Float64]
if matches_supertype:
dtypes.append(Int8)
dtypes.append(Int16)
dtypes.append(Int32)
dtypes.append(Int64)
dtypes.append(Float32)
dtypes.append(Float64)
elif isinstance(value, str):
dtypes = [Utf8, Categorical]

return self.with_column(pli.col(dtypes).fill_null(value, strategy, limit))

"""
return self.select(pli.all().fill_null(value, strategy, limit))

def fill_nan(self: LDF, fill_value: int | str | float | pli.Expr | None) -> LDF:
Expand Down
13 changes: 0 additions & 13 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,16 +195,3 @@ def test_filter_not_of_type_bool() -> None:
pl.ComputeError, match="Filter predicate must be of type Boolean, got"
):
df.filter(pl.col("json_val").str.json_path_match("$.a"))


def test_fill_null_unknown_supertype() -> None:
df = pl.DataFrame(
[
pl.Series("a", [1, None, 3]),
pl.Series("b", ["hello", "at the", "bar"], dtype=pl.Categorical),
pl.Series("c", [1, 1, None]),
]
)

with pytest.raises(pl.SchemaError):
df.fill_null(0)
33 changes: 33 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,39 @@ def test_fill_null() -> None:
assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]

df = pl.DataFrame(
[
pl.Series("i32", [1, 2, None], dtype=pl.Int32),
pl.Series("i64", [1, 2, None], dtype=pl.Int64),
pl.Series("f32", [1, 2, None], dtype=pl.Float32),
pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),
pl.Series("str", ["a", "b", None], dtype=pl.Utf8),
pl.Series("bool", [True, True, None], dtype=pl.Boolean),
]
)

assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(
False
).to_dict(False) == {
"i32": [1, 2, None],
"i64": [1, 2, 0],
"f32": [1.0, 2.0, None],
"cat": ["a", "b", "bar"],
"str": ["a", "b", "bar"],
"bool": [True, True, False],
}

assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(
False
).to_dict(False) == {
"i32": [1, 2, 0],
"i64": [1, 2, 0],
"f32": [1.0, 2.0, 0.0],
"cat": ["a", "b", "bar"],
"str": ["a", "b", "bar"],
"bool": [True, True, False],
}


def test_fill_nan() -> None:
nan = float("nan")
Expand Down

0 comments on commit ea14910

Please sign in to comment.