Skip to content

Commit

Permalink
refactor(python): Parse fixed timezone offsets without pytz (#5769)
Browse files Browse the repository at this point in the history
  • Loading branch information
zundertj committed Dec 10, 2022
1 parent baf8d92 commit 88317c7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 36 deletions.
41 changes: 16 additions & 25 deletions py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import sys
import warnings
from datetime import date, datetime, time, timedelta, timezone
from datetime import date, datetime, time, timedelta, timezone, tzinfo
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence, TypeVar, overload

Expand Down Expand Up @@ -259,38 +259,29 @@ def _to_python_datetime(
# cache here as we have a single tz per column
# and this function will be called on every conversion
@functools.lru_cache(16)
def _localize_offset(dt: datetime, offset: str) -> datetime:
def _parse_fixed_tz_offset(offset: str) -> tzinfo:
try:
import pytz
except ImportError:
raise ImportError("pytz needs to be installed to handle datetimes with offsets")
import re

if offset.startswith("-"):
g = re.search(r"-(\d\d):(\d\d)", offset)
if g is None:
raise ValueError(f"Offset: {offset} not understood.")
hours = -int(g.group(1))
minutes = -int(g.group(2))
elif offset.startswith("+"):
g = re.search(r"\+(\d\d):(\d\d)", offset)
if g is None:
raise ValueError(f"Offset: {offset} not understood.")
hours = int(g.group(1))
minutes = int(g.group(2))
else:
raise ValueError(f"Offset: {offset} not understood.")
# use fromisoformat to parse the offset
dt_offset = datetime.fromisoformat("2000-01-01T00:00:00" + offset)

tz = pytz.FixedOffset(hours * 60 + minutes)
return dt.astimezone(tz)
# alternatively, we parse the offset ourselves extracting hours and
# minutes, then we can construct:
# tzinfo=timezone(timedelta(hours=..., minutes=...))
except ValueError:
raise ValueError(f"Offset: {offset} not understood.") from None

return dt_offset.tzinfo # type: ignore[return-value]


def _localize(dt: datetime, tz: str) -> datetime:
# zone info installation should already be checked
try:
return dt.astimezone(zoneinfo.ZoneInfo(tz))
tzinfo = zoneinfo.ZoneInfo(tz)
except zoneinfo.ZoneInfoNotFoundError:
return _localize_offset(dt, tz)
# try fixed offset, which is not supported by ZoneInfo
tzinfo = _parse_fixed_tz_offset(tz) # type: ignore[assignment]

return dt.astimezone(tzinfo)


def _in_notebook() -> bool:
Expand Down
1 change: 0 additions & 1 deletion py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ module = [
"connectorx",
"IPython.*",
"zoneinfo",
"pytz",
]
ignore_missing_imports = true

Expand Down
1 change: 0 additions & 1 deletion py-polars/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
numpy
pandas
pyarrow
pytz
backports.zoneinfo; python_version < '3.9'
tzdata; platform_system == 'Windows'
xlsx2csv
Expand Down
22 changes: 13 additions & 9 deletions py-polars/tests/unit/test_timezone.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from datetime import datetime
from datetime import datetime, timedelta, timezone

import pytz
import pytest

import polars as pl


def test_timezone_aware_strptime() -> None:
@pytest.mark.parametrize(
"tz_string,timedelta",
[("+01:00", timedelta(minutes=60)), ("-01:30", timedelta(hours=-1, minutes=-30))],
)
def test_timezone_aware_strptime(tz_string: str, timedelta: timedelta) -> None:
times = pl.DataFrame(
{
"delivery_datetime": [
"2021-12-05 06:00:00+01:00",
"2021-12-05 07:00:00+01:00",
"2021-12-05 08:00:00+01:00",
"2021-12-05 06:00:00" + tz_string,
"2021-12-05 07:00:00" + tz_string,
"2021-12-05 08:00:00" + tz_string,
]
}
)
assert times.with_column(
pl.col("delivery_datetime").str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S%z")
).to_dict(False) == {
"delivery_datetime": [
datetime(2021, 12, 5, 6, 0, tzinfo=pytz.FixedOffset(60)),
datetime(2021, 12, 5, 7, 0, tzinfo=pytz.FixedOffset(60)),
datetime(2021, 12, 5, 8, 0, tzinfo=pytz.FixedOffset(60)),
datetime(2021, 12, 5, 6, 0, tzinfo=timezone(timedelta)),
datetime(2021, 12, 5, 7, 0, tzinfo=timezone(timedelta)),
datetime(2021, 12, 5, 8, 0, tzinfo=timezone(timedelta)),
]
}

0 comments on commit 88317c7

Please sign in to comment.