Skip to content

Commit

Permalink
fix(python): ensure that polars_type_to_constructor works with tz-a…
Browse files Browse the repository at this point in the history
…ware `Datetime` dtypes (#5239)
  • Loading branch information
alexander-beedie committed Oct 17, 2022
1 parent 8d975f4 commit acf7f65
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 16 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class Datetime(DataType):
tu: TimeUnit | None = None
tz: str | None = None

def __init__(self, time_unit: TimeUnit = "us", time_zone: str | None = None):
def __init__(self, time_unit: TimeUnit | None = "us", time_zone: str | None = None):
"""
Calendar date and time type.
Expand Down
6 changes: 2 additions & 4 deletions py-polars/polars/datatypes_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Any, Callable, Sequence

from polars.datatypes import (
DTYPE_TEMPORAL_UNITS,
Boolean,
Categorical,
Date,
Expand All @@ -23,6 +22,7 @@
UInt32,
UInt64,
Utf8,
_base_type,
)

try:
Expand Down Expand Up @@ -63,16 +63,14 @@
Object: PySeries.new_object,
Categorical: PySeries.new_str,
}
for tu in DTYPE_TEMPORAL_UNITS:
_POLARS_TYPE_TO_CONSTRUCTOR[Datetime(tu)] = PySeries.new_opt_i64
_POLARS_TYPE_TO_CONSTRUCTOR[Duration(tu)] = PySeries.new_opt_i64


def polars_type_to_constructor(
dtype: PolarsDataType,
) -> Callable[[str, Sequence[Any], bool], PySeries]:
"""Get the right PySeries constructor for the given Polars dtype."""
try:
dtype = _base_type(dtype)
return _POLARS_TYPE_TO_CONSTRUCTOR[dtype]
except KeyError: # pragma: no cover
raise ValueError(f"Cannot construct PySeries for type {dtype}.") from None
Expand Down
43 changes: 32 additions & 11 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2395,25 +2395,46 @@ def test_union_with_aliases_4770() -> None:
assert lf.collect()["x"].to_list() == [1, 3, 4]


def test_init_with_timezone() -> None:
def test_init_datetimes_with_timezone() -> None:
tz_us = "America/New_York"
tz_europe = "Europe/Amsterdam"

dtm = datetime(2022, 10, 12, 12, 30, tzinfo=ZoneInfo("UTC"))
for tu in DTYPE_TEMPORAL_UNITS | frozenset([None]):
df = pl.DataFrame(
data={
"d1": [datetime(2022, 10, 12, 12, 30)],
"d2": [datetime(2022, 10, 12, 12, 30)],
},
data={"d1": [dtm], "d2": [dtm]},
columns=[
("d1", pl.Datetime(tu, "America/New_York")), # type: ignore[arg-type]
("d2", pl.Datetime(tu, "Asia/Tokyo")), # type: ignore[arg-type]
("d1", pl.Datetime(tu, tz_us)),
("d2", pl.Datetime(tu, tz_europe)),
],
)
# note: setting timezone doesn't change the underlying/physical value...
assert (df["d1"].to_physical() == df["d2"].to_physical()).all()
assert df.rows() == [
(
datetime(2022, 10, 12, 8, 30, tzinfo=ZoneInfo(tz_us)),
datetime(2022, 10, 12, 14, 30, tzinfo=ZoneInfo(tz_europe)),
)
]


def test_init_physical_with_timezone() -> None:
tz_uae = "Asia/Dubai"
tz_asia = "Asia/Tokyo"

# ...but (as expected) it _does_ change the interpretation of that value
dtm_us = 1665577800000000
for tu in DTYPE_TEMPORAL_UNITS | frozenset([None]):
dtm = {"ms": dtm_us // 1_000, "ns": dtm_us * 1_000}.get(str(tu), dtm_us)
df = pl.DataFrame(
data={"d1": [dtm], "d2": [dtm]},
columns=[
("d1", pl.Datetime(tu, tz_uae)),
("d2", pl.Datetime(tu, tz_asia)),
],
)
assert (df["d1"].to_physical() == df["d2"].to_physical()).all()
assert df.rows() == [
(
datetime(2022, 10, 12, 8, 30, tzinfo=ZoneInfo("America/New_York")),
datetime(2022, 10, 12, 21, 30, tzinfo=ZoneInfo("Asia/Tokyo")),
datetime(2022, 10, 12, 16, 30, tzinfo=ZoneInfo(tz_uae)),
datetime(2022, 10, 12, 21, 30, tzinfo=ZoneInfo(tz_asia)),
)
]

0 comments on commit acf7f65

Please sign in to comment.