Skip to content

Commit

Permalink
fix(python): use of fill_null with temporal literals (#5440)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 7, 2022
1 parent d80885a commit a452ca2
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 32 deletions.
53 changes: 29 additions & 24 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import subprocess
import typing
from datetime import date, datetime, time, timedelta
from io import BytesIO, IOBase, StringIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, NoReturn, Sequence, TypeVar, overload
Expand All @@ -10,9 +11,13 @@
from polars import internals as pli
from polars.cfg import Config
from polars.datatypes import (
DTYPE_TEMPORAL_UNITS,
Boolean,
Categorical,
DataType,
Date,
Datetime,
Duration,
Float32,
Float64,
Int8,
Expand All @@ -21,6 +26,7 @@
Int64,
PolarsDataType,
Schema,
Time,
UInt8,
UInt16,
UInt32,
Expand Down Expand Up @@ -2269,7 +2275,7 @@ def fill_null(
Number of consecutive null values to fill when using the 'forward' or
'backward' strategy.
matches_supertype
Fill all matching supertype of the fill ``value``.
Fill all matching supertypes of the fill ``value`` literal.
Examples
--------
Expand Down Expand Up @@ -2343,39 +2349,38 @@ def fill_null(
└─────┴──────┘
"""
dtypes: Sequence[PolarsDataType]

if value is not None:

def infer_dtype(value: Any) -> PolarsDataType:
return next(iter(self.select(value).schema.values()))

if isinstance(value, pli.Expr):
dtype = next(iter(self.select(value).schema.values()))
dtypes = [dtype]
dtypes = [infer_dtype(value)]
elif isinstance(value, bool):
dtypes = [Boolean]
elif matches_supertype and isinstance(value, (int, float)):
ints = [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64]
floats = [Float32, Float64]
dtypes = ints + floats
elif isinstance(value, int):
dtypes = [Int64]
if matches_supertype:
dtypes.append(Int8)
dtypes.append(Int16)
dtypes.append(Int32)
dtypes.append(UInt8)
dtypes.append(UInt16)
dtypes.append(UInt32)
dtypes.append(UInt64)
dtypes.append(Float32)
dtypes.append(Float64)
elif isinstance(value, float):
dtypes = [Float64]
if matches_supertype:
dtypes.append(Int8)
dtypes.append(Int16)
dtypes.append(Int32)
dtypes.append(Int64)
dtypes.append(UInt8)
dtypes.append(UInt16)
dtypes.append(UInt32)
dtypes.append(UInt64)
dtypes.append(Float32)
dtypes.append(Float64)
elif isinstance(value, datetime):
dtypes = [Datetime] + [Datetime(tu) for tu in DTYPE_TEMPORAL_UNITS]
elif isinstance(value, timedelta):
dtypes = [Duration] + [Duration(tu) for tu in DTYPE_TEMPORAL_UNITS]
elif isinstance(value, date):
dtypes = [Date]
elif isinstance(value, time):
dtypes = [Time]
elif isinstance(value, str):
dtypes = [Utf8, Categorical]
else:
# fallback; anything not explicitly handled above
dtypes = [infer_dtype(pli.lit(value))]

return self.with_column(pli.col(dtypes).fill_null(value, strategy, limit))

Expand Down
77 changes: 69 additions & 8 deletions py-polars/tests/unit/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@


def test_fill_null() -> None:
dt = datetime.strptime("2021-01-01", "%Y-%m-%d")
s = pl.Series("A", [dt, None])
dtm = datetime.strptime("2021-01-01", "%Y-%m-%d")
s = pl.Series("A", [dtm, None])

for fill_val in (dt, pl.lit(dt)):
for fill_val in (dtm, pl.lit(dtm)):
out = s.fill_null(fill_val)

assert out.null_count() == 0
assert out.dt[0] == dt
assert out.dt[1] == dt
assert out.dt[0] == dtm
assert out.dt[1] == dtm

dt1 = date(2001, 1, 1)
dt2 = date(2001, 1, 2)
dt3 = date(2001, 1, 3)

s = pl.Series("a", [dt1, dt2, dt3, None])
dt_2 = date(2001, 1, 4)

for fill_val in (dt_2, pl.lit(dt_2)):
out = s.fill_null(fill_val)

Expand All @@ -49,6 +51,65 @@ def test_fill_null() -> None:
assert out.dt[-1] == dt_2


def test_fill_null_temporal() -> None:
# test filling nulls with temporal literals across cols that use various timeunits
dtm = datetime.now()
dtm_ms = dtm.replace(microsecond=(dtm.microsecond // 1000) * 1000)
td = timedelta(days=7, seconds=45045)
tm = dtm.time()
dt = dtm.date()

df = pl.DataFrame(
[
[dtm, dtm_ms, dtm, dtm, dt, tm, td, td, td, td],
[None] * 10,
],
columns=[ # type: ignore[arg-type]
("a", pl.Datetime),
("b", pl.Datetime("ms")),
("c", pl.Datetime("us")),
("d", pl.Datetime("ns")),
("e", pl.Date),
("f", pl.Time),
("g", pl.Duration),
("h", pl.Duration("ms")),
("i", pl.Duration("us")),
("j", pl.Duration("ns")),
],
orient="row",
)

# fill literals
dtm_us_fill = dtm_ns_fill = datetime(2023, 12, 31, 23, 59, 59, 999999)
dtm_ms_fill = datetime(2023, 12, 31, 23, 59, 59, 999000)
td_us_fill = timedelta(days=7, seconds=45045, microseconds=123456)
td_ms_fill = timedelta(days=7, seconds=45045, microseconds=123000)
dt_fill = date(2023, 12, 31)
tm_fill = time(23, 59, 59)

# apply literals via fill_null
ldf = df.lazy()
for temporal_literal in (dtm_ns_fill, td_us_fill, dt_fill, tm_fill):
ldf = ldf.fill_null(temporal_literal)

# validate
assert ldf.collect().rows() == [
(dtm, dtm_ms, dtm, dtm, dt, tm, td, td, td, td), # first row (no null values)
( # second row (was composed entirely of nulls, now filled-in with literals)
dtm_us_fill,
dtm_ms_fill,
dtm_us_fill,
dtm_ns_fill,
dt_fill,
tm_fill,
td_us_fill,
td_ms_fill,
td_us_fill,
td_us_fill,
),
]


def test_filter_date() -> None:
dtcol = pl.col("date")
df = pl.DataFrame(
Expand Down Expand Up @@ -1084,8 +1145,9 @@ def test_timelike_init() -> None:


def test_timedelta_timeunit_init() -> None:
d, s, us = 7, 45045, 123456
td_us = timedelta(days=d, seconds=s, microseconds=us)
td_us = timedelta(days=7, seconds=45045, microseconds=123456)
td_ms = timedelta(days=7, seconds=45045, microseconds=123000)

df = pl.DataFrame(
[[td_us, td_us, td_us]],
columns=[
Expand All @@ -1095,7 +1157,6 @@ def test_timedelta_timeunit_init() -> None:
],
orient="row",
)
td_ms = timedelta(days=d, seconds=s, microseconds=(us // 1000) * 1000)
assert df.rows() == [(td_ms, td_us, td_us)]


Expand Down

0 comments on commit a452ca2

Please sign in to comment.