Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(rust, python): warn that, in a future version of Polars, constructing a Series with time-zone-aware datetimes will result in a dtype with UTC timezone #8908

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions py-polars/polars/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ class TooManyRowsReturnedError(RowsError):
"""Exception raised when more rows than expected are returned."""


class TimeZoneAwareConstructorWarning(Warning):
"""
Warning raised when constructing a Series from time-zone-aware datetimes.

In a future version of polars, these will be converted to UTC.
"""


class ChronoFormatWarning(Warning):
"""
Warning raised when a chrono format string contains dubious patterns.
Expand Down
15 changes: 13 additions & 2 deletions py-polars/polars/utils/_construction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import contextlib
import warnings
from datetime import date, datetime, time, timedelta
from decimal import Decimal as PyDecimal
from functools import lru_cache, partial, singledispatch
Expand Down Expand Up @@ -55,11 +56,11 @@
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
from polars.dependencies import pyarrow as pa
from polars.exceptions import ComputeError, ShapeError
from polars.exceptions import ComputeError, ShapeError, TimeZoneAwareConstructorWarning
from polars.utils._wrap import wrap_df, wrap_s
from polars.utils.convert import _tzinfo_to_str
from polars.utils.meta import threadpool_size
from polars.utils.various import _is_generator, arrlen, range_to_series
from polars.utils.various import _is_generator, arrlen, find_stacklevel, range_to_series

with contextlib.suppress(ImportError): # Module not available when building docs
from polars.polars import PyDataFrame, PySeries
Expand Down Expand Up @@ -441,6 +442,16 @@ def sequence_to_pyseries(
"Given time_zone is different from that of timezone aware datetimes."
f" Given: '{dtype_tz}', got: '{tz}'."
)
if tz != "UTC":
warnings.warn(
"In a future version of polars, constructing a Series with time-zone-aware "
"datetimes will result in a Series with UTC time zone. "
"To silence this warning and opt-in to the new behaviour, you can filter "
"warnings of class TimeZoneAwareConstructorWarning and then use "
"`.dt.convert_time_zone('UTC')`.",
TimeZoneAwareConstructorWarning,
stacklevel=find_stacklevel(),
)
return s.dt.replace_time_zone("UTC").dt.convert_time_zone(tz)._s
return s._s

Expand Down
68 changes: 43 additions & 25 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import contextlib
import io
from datetime import date, datetime, time, timedelta, timezone
from typing import TYPE_CHECKING, Any, cast, no_type_check
Expand All @@ -11,7 +12,12 @@

import polars as pl
from polars.datatypes import DATETIME_DTYPES, DTYPE_TEMPORAL_UNITS, TEMPORAL_DTYPES
from polars.exceptions import ArrowError, ComputeError, PolarsPanicError
from polars.exceptions import (
ArrowError,
ComputeError,
PolarsPanicError,
TimeZoneAwareConstructorWarning,
)
from polars.testing import (
assert_frame_equal,
assert_series_equal,
Expand Down Expand Up @@ -341,9 +347,10 @@ def test_datetime_consistency() -> None:
datetime(3099, 12, 31, 23, 59, 59, 123456, tzinfo=ZoneInfo("Asia/Kathmandu")),
datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=ZoneInfo("Asia/Kathmandu")),
]
ddf = pl.DataFrame({"dtm": test_data}).with_columns(
pl.col("dtm").dt.nanosecond().alias("ns")
)
with pytest.warns(TimeZoneAwareConstructorWarning, match=r"UTC time zone"):
ddf = pl.DataFrame({"dtm": test_data}).with_columns(
pl.col("dtm").dt.nanosecond().alias("ns")
)
assert ddf.rows() == [
(test_data[0], 555555000),
(test_data[1], 986754000),
Expand All @@ -357,7 +364,8 @@ def test_datetime_consistency() -> None:
datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("US/Central")),
datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("US/Central")),
]
ddf = pl.DataFrame({"dtm": test_data})
with pytest.warns(TimeZoneAwareConstructorWarning, match=r"UTC time zone"):
ddf = pl.DataFrame({"dtm": test_data})
assert ddf.rows() == [
(test_data[0],),
(test_data[1],),
Expand Down Expand Up @@ -2260,7 +2268,8 @@ def test_tz_datetime_duration_arithm_5221() -> None:

def test_auto_infer_time_zone() -> None:
dt = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))
s = pl.Series([dt])
with pytest.warns(TimeZoneAwareConstructorWarning, match=r"UTC time zone"):
s = pl.Series([dt])
assert s.dtype == pl.Datetime("us", "Asia/Shanghai")
assert s[0] == dt

Expand Down Expand Up @@ -3065,38 +3074,47 @@ def test_series_is_temporal() -> None:


@pytest.mark.parametrize(
"time_zone",
("time_zone", "warn"),
[
None,
timezone.utc,
"America/Caracas",
"Asia/Kathmandu",
"Asia/Taipei",
"Europe/Amsterdam",
"Europe/Lisbon",
"Indian/Maldives",
"Pacific/Norfolk",
"Pacific/Samoa",
"Turkey",
"US/Eastern",
"UTC",
"Zulu",
(None, False),
(timezone.utc, False),
("America/Caracas", True),
("Asia/Kathmandu", True),
("Asia/Taipei", True),
("Europe/Amsterdam", True),
("Europe/Lisbon", True),
("Indian/Maldives", True),
("Pacific/Norfolk", True),
("Pacific/Samoa", True),
("Turkey", True),
("US/Eastern", True),
("UTC", False),
("Zulu", True),
],
)
def test_misc_precision_any_value_conversion(time_zone: Any) -> None:
def test_misc_precision_any_value_conversion(time_zone: Any, warn: bool) -> None:
tz = ZoneInfo(time_zone) if isinstance(time_zone, str) else time_zone
context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None]
msg = r"UTC time zone"
if warn:
context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg)
else:
context_manager = contextlib.nullcontext()

# default precision (μs)
dt = datetime(2514, 5, 30, 1, 53, 4, 986754, tzinfo=tz)
assert pl.Series([dt]).to_list() == [dt]
with context_manager:
assert pl.Series([dt]).to_list() == [dt]

# ms precision
dt = datetime(2243, 1, 1, 0, 0, 0, 1000, tzinfo=tz)
assert pl.Series([dt]).cast(pl.Datetime("ms", time_zone)).to_list() == [dt]
with context_manager:
assert pl.Series([dt]).cast(pl.Datetime("ms", time_zone)).to_list() == [dt]

# ns precision
dt = datetime(2256, 1, 1, 0, 0, 0, 1, tzinfo=tz)
assert pl.Series([dt]).cast(pl.Datetime("ns", time_zone)).to_list() == [dt]
with context_manager:
assert pl.Series([dt]).cast(pl.Datetime("ns", time_zone)).to_list() == [dt]


@pytest.mark.parametrize(
Expand Down
22 changes: 11 additions & 11 deletions py-polars/tests/unit/operations/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,20 +461,20 @@ def test_groupby_dynamic_flat_agg_4814() -> None:
(timedelta(seconds=10), "100s"),
],
)
@pytest.mark.parametrize("tzinfo", [None, ZoneInfo("Asia/Kathmandu")])
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
def test_groupby_dynamic_overlapping_groups_flat_apply_multiple_5038(
every: str | timedelta, period: str | timedelta, tzinfo: ZoneInfo | None
every: str | timedelta, period: str | timedelta, time_zone: str | None
) -> None:
assert (
pl.DataFrame(
{
"a": [
datetime(2021, 1, 1, tzinfo=tzinfo) + timedelta(seconds=2**i)
for i in range(10)
datetime(2021, 1, 1) + timedelta(seconds=2**i) for i in range(10)
],
"b": [float(i) for i in range(10)],
}
)
.with_columns(pl.col("a").dt.replace_time_zone(time_zone))
.lazy()
.set_sorted("a")
.groupby_dynamic("a", every=every, period=period)
Expand Down Expand Up @@ -660,20 +660,20 @@ def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> Non
) == {"group": [1, 2], "data": [10000000.0, 10000000.0]}


@pytest.mark.parametrize("tzinfo", [None, ZoneInfo("Asia/Kathmandu")])
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
def test_groupby_dynamic_elementwise_following_mean_agg_6904(
tzinfo: ZoneInfo | None,
time_zone: str | None,
) -> None:
df = (
pl.DataFrame(
{
"a": [
datetime(2021, 1, 1, tzinfo=tzinfo) + timedelta(seconds=2**i)
for i in range(5)
datetime(2021, 1, 1) + timedelta(seconds=2**i) for i in range(5)
],
"b": [float(i) for i in range(5)],
}
)
.with_columns(pl.col("a").dt.replace_time_zone(time_zone))
Copy link
Collaborator Author

@MarcoGorelli MarcoGorelli May 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

setting the time zone outside the constructor to avoid the warning (the constructor isn't the point of this test anyway) - likewise for the one below

.lazy()
.set_sorted("a")
.groupby_dynamic("a", every="10s", period="100s")
Expand All @@ -685,12 +685,12 @@ def test_groupby_dynamic_elementwise_following_mean_agg_6904(
pl.DataFrame(
{
"a": [
datetime(2021, 1, 1, 0, 0, tzinfo=tzinfo),
datetime(2021, 1, 1, 0, 0, 10, tzinfo=tzinfo),
datetime(2021, 1, 1, 0, 0),
datetime(2021, 1, 1, 0, 0, 10),
],
"c": [0.9092974268256817, -0.7568024953079282],
}
),
).with_columns(pl.col("a").dt.replace_time_zone(time_zone)),
)


Expand Down
19 changes: 11 additions & 8 deletions py-polars/tests/unit/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import polars as pl
from polars.dependencies import _ZONEINFO_AVAILABLE, dataclasses, pydantic
from polars.exceptions import TimeZoneAwareConstructorWarning
from polars.testing import assert_frame_equal, assert_series_equal
from polars.utils._construction import type_hints

Expand Down Expand Up @@ -674,15 +675,17 @@ def test_init_1d_sequence() -> None:
[datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")}
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
schema={"ts": pl.Datetime("ms")},
)
with pytest.warns(TimeZoneAwareConstructorWarning, match=r"UTC time zone"):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
schema={"ts": pl.Datetime("ms")},
)
assert df.schema == {"ts": pl.Datetime("ms", "+01:00")}
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
schema={"ts": pl.Datetime("ms")},
)
with pytest.warns(TimeZoneAwareConstructorWarning, match=r"UTC time zone"):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
schema={"ts": pl.Datetime("ms")},
)
assert df.schema == {"ts": pl.Datetime("ms", "Asia/Kathmandu")}


Expand Down