Skip to content

Commit

Permalink
fix(python): fix use of date_range with 'lazy' parameter (#5652)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 28, 2022
1 parent 9eaf247 commit 8497f95
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 9 deletions.
13 changes: 6 additions & 7 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,9 @@ def date_range(
Using polars duration string to specify the interval:
>>> from datetime import date
>>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
>>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="dtrange")
shape: (3,)
Series: 'drange' [date]
Series: 'dtrange' [date]
[
2022-01-01
2022-02-01
Expand Down Expand Up @@ -329,14 +329,16 @@ def date_range(
]
"""
if name is None:
name = ""
if isinstance(interval, timedelta):
interval = _timedelta_to_pl_duration(interval)
elif " " in interval:
interval = interval.replace(" ", "")

if isinstance(low, pli.Expr) or isinstance(high, pli.Expr) or lazy:
low = pli.expr_to_lit_or_expr(low, str_to_lit=True)
high = pli.expr_to_lit_or_expr(high, str_to_lit=True)
low = pli.expr_to_lit_or_expr(low, str_to_lit=True)._pyexpr
high = pli.expr_to_lit_or_expr(high, str_to_lit=True)._pyexpr
return pli.wrap_expr(
_py_date_range_lazy(low, high, interval, closed, name, time_zone)
)
Expand Down Expand Up @@ -370,9 +372,6 @@ def date_range(

start = _datetime_to_pl_timestamp(low, tu)
stop = _datetime_to_pl_timestamp(high, tu)
if name is None:
name = ""

dt_range = pli.wrap_s(
_py_date_range(start, stop, interval, closed, name, tu, time_zone)
)
Expand Down
66 changes: 66 additions & 0 deletions py-polars/tests/unit/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,72 @@ def test_date_range() -> None:
assert result.cast(pl.Utf8)[-1] == "2022-01-01 00:00:59.247379260"


def test_date_range_lazy() -> None:
# lazy date range with literals
df = pl.DataFrame({"misc": ["x"]}).with_columns(
pl.date_range(
date(2000, 1, 1),
date(2023, 8, 31),
interval="987d",
lazy=True,
)
.list()
.alias("dts")
)
assert df.rows() == [
(
"x",
[
date(2000, 1, 1),
date(2002, 9, 14),
date(2005, 5, 28),
date(2008, 2, 9),
date(2010, 10, 23),
date(2013, 7, 6),
date(2016, 3, 19),
date(2018, 12, 1),
date(2021, 8, 14),
],
)
]
assert (
df.rows()[0][1]
== pd.date_range(
date(2000, 1, 1), date(2023, 12, 31), freq="987d"
).date.tolist()
)

# lazy date range with expressions
ldf = (
pl.DataFrame({"start": [date(2015, 6, 30)], "stop": [date(2022, 12, 31)]})
.with_columns(
pl.date_range(
pl.col("start"),
pl.col("stop"),
interval="678d",
lazy=True,
)
.list()
.alias("dts")
)
.lazy()
)

assert ldf.collect().rows() == [
(
date(2015, 6, 30),
date(2022, 12, 31),
[
date(2015, 6, 30),
date(2017, 5, 8),
date(2019, 3, 17),
date(2021, 1, 23),
date(2022, 12, 2),
],
)
]


@pytest.mark.parametrize(
"one,two",
[
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,10 +1387,10 @@ def test_reproducible_hash_with_seeds() -> None:
seeds = (11, 22, 33, 44)

# TODO: introduce a platform-stable string hash...
# in the meantime, account for arm64 (mac) hash values to reduce noise
# in the meantime, try to account for arm64 (mac) hash values to reduce noise
expected = pl.Series(
"s",
[8823051245921001677, 988796329533502010, 7528667241828618484]
[6629530352159708028,15496313222292466864,6048298245521876612]
if platform.mac_ver()[-1] == "arm64"
else [6629530352159708028, 988796329533502010, 6048298245521876612],
dtype=pl.UInt64,
Expand Down

0 comments on commit 8497f95

Please sign in to comment.