Skip to content

Commit

Permalink
Tidy Arrow timestamp handling (#1887)
Browse files Browse the repository at this point in the history
* Use isinstance to check for timestamp type
* Only raise timezone warning in case a timezone aware array is passed in
* Add test to guarantee the previous point; cleans up all TZ warnings in test output
  • Loading branch information
zundertj committed Nov 25, 2021
1 parent 2995508 commit 7a37efb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
9 changes: 5 additions & 4 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,11 @@ def pandas_to_pydf(
def coerce_arrow(array: "pa.Array") -> "pa.Array":
# also coerces timezone to naive representation
# units are accounted for by pyarrow
if "timestamp" in str(array.type):
warnings.warn(
"Conversion of (potentially) timezone aware to naive datetimes. TZ information may be lost",
)
if isinstance(array, pa.TimestampArray):
if array.type.tz is not None:
warnings.warn(
"Conversion of timezone aware to naive datetimes. TZ information may be lost",
)
ts_ms = pa.compute.cast(array, pa.timestamp("ms"), safe=False)
ms = pa.compute.cast(ts_ms, pa.int64())
del ts_ms
Expand Down
20 changes: 19 additions & 1 deletion py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from datetime import date, datetime, timedelta

import pyarrow as pa
import pytest

import polars as pl


Expand Down Expand Up @@ -159,7 +162,7 @@ def test_datetime_consistency() -> None:


def downsample_with_buckets() -> None:
(
assert (
pl.date_range(
low=datetime(2000, 10, 1, 23, 30),
high=datetime(2000, 10, 2, 0, 30),
Expand All @@ -172,3 +175,18 @@ def downsample_with_buckets() -> None:
)
.agg(pl.col("date_range").count().alias("bucket_count"))
).to_series(1).to_list() == [3, 2, 3, 1]


def test_timezone() -> None:
ts = pa.timestamp("s")
data = pa.array([1000, 2000], type=ts)
s: pl.Series = pl.from_arrow(data) # type: ignore

# with timezone; we do expect a warning here
tz_ts = pa.timestamp("s", tz="America/New_York")
tz_data = pa.array([1000, 2000], type=tz_ts)
with pytest.warns(Warning):
tz_s: pl.Series = pl.from_arrow(tz_data) # type: ignore

# timezones have no effect, i.e. `s` equals `tz_s`
assert s.series_equal(tz_s)

0 comments on commit 7a37efb

Please sign in to comment.