Skip to content

Commit

Permalink
Allow date_range to produce date ranges as well as datetime (#3798
Browse files Browse the repository at this point in the history
)
  • Loading branch information
alexander-beedie committed Jun 24, 2022
1 parent 245f86e commit f6ea383
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 36 deletions.
76 changes: 50 additions & 26 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime, timedelta
from typing import Optional, Sequence, Union, overload
from datetime import date, datetime, timedelta
from typing import Optional, Sequence, Tuple, Union, overload

from polars import internals as pli
from polars.datatypes import Date
from polars.utils import (
_datetime_to_pl_timestamp,
_timedelta_to_pl_duration,
Expand Down Expand Up @@ -146,41 +147,59 @@ def concat(
return out


def _ensure_datetime(value: Union[date, datetime]) -> Tuple[datetime, bool]:
is_date_type = False
if isinstance(value, date) and not isinstance(value, datetime):
value = datetime(value.year, value.month, value.day)
is_date_type = True
return value, is_date_type


def _interval_granularity(interval: str) -> str:
return interval[-2:].lstrip("0123456789")


def date_range(
low: datetime,
high: datetime,
low: Union[date, datetime],
high: Union[date, datetime],
interval: Union[str, timedelta],
closed: Optional[str] = "both",
name: Optional[str] = None,
time_unit: Optional[str] = None,
) -> "pli.Series":
"""
Create a date range of type `Datetime`.
Create a range of type `Datetime` (or `Date`).
Parameters
----------
low
Lower bound of the date range
Lower bound of the date range.
high
Upper bound of the date range
Upper bound of the date range.
interval
Interval periods
A python timedelta object or a polars duration `str`
e.g.: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
closed {None, 'left', 'right', 'both', 'none'}
Make the interval closed to the 'left', 'right', 'none' or 'both' sides.
name
Name of the output Series
Name of the output Series.
time_unit
Set the time unit; one of {'ns', 'ms'}
Set the time unit; one of {'ns', 'us', 'ms'}.
Notes
-----
If both `low` and `high` are passed as date types (not datetime), and the
interval granularity is no finer than 1d, the returned range is also of
type date. All other permutations return a datetime Series.
Returns
-------
A Series of type `Datetime`
A Series of type `Datetime` or `Date`.
Examples
--------
>>> from datetime import datetime
>>> from datetime import datetime, date
>>> pl.date_range(datetime(1985, 1, 1), datetime(2015, 7, 1), "1d12h")
shape: (7426,)
Series: '' [datetime[ns]]
Expand All @@ -190,32 +209,29 @@ def date_range(
1985-01-04 00:00:00
1985-01-05 12:00:00
1985-01-07 00:00:00
1985-01-08 12:00:00
1985-01-10 00:00:00
1985-01-11 12:00:00
1985-01-13 00:00:00
1985-01-14 12:00:00
1985-01-16 00:00:00
1985-01-17 12:00:00
...
2015-06-14 00:00:00
2015-06-15 12:00:00
2015-06-17 00:00:00
2015-06-18 12:00:00
2015-06-20 00:00:00
2015-06-21 12:00:00
2015-06-23 00:00:00
2015-06-24 12:00:00
2015-06-26 00:00:00
2015-06-27 12:00:00
2015-06-29 00:00:00
2015-06-30 12:00:00
]
>>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
shape: (3,)
Series: 'drange' [date]
[
2022-01-01
2022-02-01
2022-03-01
]
"""
if isinstance(interval, timedelta):
interval = _timedelta_to_pl_duration(interval)

low, low_is_date = _ensure_datetime(low)
high, high_is_date = _ensure_datetime(high)

if in_nanoseconds_window(low) and in_nanoseconds_window(high) and time_unit is None:
tu = "ns"
elif time_unit is not None:
Expand All @@ -228,4 +244,12 @@ def date_range(
if name is None:
name = ""

return pli.wrap_s(_py_date_range(start, stop, interval, closed, name, tu))
dt_range = pli.wrap_s(_py_date_range(start, stop, interval, closed, name, tu))
if (
low_is_date
and high_is_date
and not _interval_granularity(interval).endswith(("h", "m", "s"))
):
dt_range = dt_range.cast(Date)

return dt_range
2 changes: 1 addition & 1 deletion py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _datetime_to_pl_timestamp(dt: datetime, tu: Optional[str]) -> int:
# python has us precision
return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e6)
else:
raise ValueError("expected on of {'ns', 'ms'}")
raise ValueError("expected on of {'ns', 'us', 'ms'}")


def _timedelta_to_pl_timedelta(td: timedelta, tu: Optional[str] = None) -> int:
Expand Down
34 changes: 29 additions & 5 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,23 +234,47 @@ def test_truncate() -> None:

def test_date_range() -> None:
result = pl.date_range(
datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12)
date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12)
)
assert len(result) == 7426
assert result.dt[0] == datetime(1985, 1, 1)
assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)

for tu in ["ns", "ms"]:
rng = pl.date_range(
datetime(2020, 1, 1), datetime(2020, 1, 2), "2h", time_unit=tu
)
for tu in ["ns", "us", "ms"]:
rng = pl.date_range(datetime(2020, 1, 1), date(2020, 1, 2), "2h", time_unit=tu)
assert rng.time_unit == tu
assert rng.shape == (13,)
assert rng.dt[0] == datetime(2020, 1, 1)
assert rng.dt[-1] == datetime(2020, 1, 2)

# if low/high are both date, range is also be date _iif_ the granularity is >= 1d
result = pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
assert result.to_list() == [date(2022, 1, 1), date(2022, 2, 1), date(2022, 3, 1)]
assert result.name == "drange"

result = pl.date_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m")
assert result == [
datetime(2022, 1, 1, 0, 0),
datetime(2022, 1, 1, 1, 30),
datetime(2022, 1, 1, 3, 0),
datetime(2022, 1, 1, 4, 30),
datetime(2022, 1, 1, 6, 0),
datetime(2022, 1, 1, 7, 30),
datetime(2022, 1, 1, 9, 0),
datetime(2022, 1, 1, 10, 30),
datetime(2022, 1, 1, 12, 0),
datetime(2022, 1, 1, 13, 30),
datetime(2022, 1, 1, 15, 0),
datetime(2022, 1, 1, 16, 30),
datetime(2022, 1, 1, 18, 0),
datetime(2022, 1, 1, 19, 30),
datetime(2022, 1, 1, 21, 0),
datetime(2022, 1, 1, 22, 30),
datetime(2022, 1, 2, 0, 0),
]


def test_date_comp() -> None:
one = datetime(2001, 1, 1)
Expand Down
11 changes: 7 additions & 4 deletions py-polars/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ def test_in_ns_window() -> None:


def test_datetime_to_pl_timestamp() -> None:
out = _datetime_to_pl_timestamp(datetime(2121, 1, 1), "ns")
assert out == 4765132800000000000
out = _datetime_to_pl_timestamp(datetime(2121, 1, 1), "ms")
assert out == 4765132800000
for dt, tu, expected in (
(datetime(2121, 1, 1), "ns", 4765132800000000000),
(datetime(2121, 1, 1), "us", 4765132800000000),
(datetime(2121, 1, 1), "ms", 4765132800000),
):
out = _datetime_to_pl_timestamp(dt, tu)
assert out == expected


def test_date_to_pl_date() -> None:
Expand Down

0 comments on commit f6ea383

Please sign in to comment.