Skip to content

Commit

Permalink
REF: Prefer testing and documenting zoneinfo instead of pytz (#59016)
Browse files Browse the repository at this point in the history
* REF: Prefer testing and documenting zoneinfo instead of pytz

* Fix tests

* Remove bad test case, fix bad attribute
  • Loading branch information
mroeschke committed Jun 24, 2024
1 parent bd7ece0 commit 1cf98aa
Show file tree
Hide file tree
Showing 62 changed files with 504 additions and 430 deletions.
15 changes: 9 additions & 6 deletions asv_bench/benchmarks/tslibs/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from datetime import datetime
from datetime import (
datetime,
timezone,
)
import zoneinfo

import numpy as np
import pytz

from pandas import Timestamp

Expand All @@ -12,7 +15,7 @@ class TimestampConstruction:
def setup(self):
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, timezone.utc)
self.ts = Timestamp("2020-01-01 00:00:00")

def time_parse_iso8601_no_tz(self):
Expand Down Expand Up @@ -113,7 +116,7 @@ def setup(self, tz):
self.ts = Timestamp("2017-08-25 08:16:14", tz=tz)

def time_replace_tz(self, tz):
self.ts.replace(tzinfo=pytz.timezone("US/Eastern"))
self.ts.replace(tzinfo=zoneinfo.ZoneInfo("US/Eastern"))

def time_replace_None(self, tz):
self.ts.replace(tzinfo=None)
Expand Down Expand Up @@ -144,8 +147,8 @@ def time_ceil(self, tz):

class TimestampAcrossDst:
def setup(self):
dt = datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
dt = datetime(2016, 3, 27, 1, fold=0)
self.tzinfo = dt.astimezone(zoneinfo.ZoneInfo("Europe/Berlin")).tzinfo
self.ts2 = Timestamp(dt)

def time_replace_across_dst(self):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/tslibs/tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
timedelta,
timezone,
)
import zoneinfo

from dateutil.tz import (
gettz,
tzlocal,
)
import numpy as np
import pytz

try:
from pandas._libs.tslibs import ints_to_pydatetime
Expand All @@ -38,7 +38,7 @@
None,
timezone.utc,
timezone(timedelta(minutes=60)),
pytz.timezone("US/Pacific"),
zoneinfo.ZoneInfo("US/Pacific"),
gettz("Asia/Tokyo"),
tzlocal_obj,
]
Expand Down
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/tslibs/tz_convert.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import timezone

import numpy as np
from pytz import UTC

from pandas._libs.tslibs.tzconversion import tz_localize_to_utc

Expand Down Expand Up @@ -41,7 +42,7 @@ def time_tz_convert_from_utc(self, size, tz):
# dti = DatetimeIndex(self.i8data, tz=tz)
# dti.tz_localize(None)
if old_sig:
tz_convert_from_utc(self.i8data, UTC, tz)
tz_convert_from_utc(self.i8data, timezone.utc, tz)
else:
tz_convert_from_utc(self.i8data, tz)

Expand Down
6 changes: 4 additions & 2 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4990,7 +4990,7 @@ Caveats
convenience you can use ``store.flush(fsync=True)`` to do this for you.
* Once a ``table`` is created columns (DataFrame)
are fixed; only exactly the same columns can be appended
* Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``)
* Be aware that timezones (e.g., ``zoneinfo.ZoneInfo('US/Eastern')``)
are not necessarily equal across timezone versions. So if data is
localized to a specific timezone in the HDFStore using one version
of a timezone library and that data is updated with another version, the data
Expand Down Expand Up @@ -5169,6 +5169,8 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.

.. ipython:: python
import pytz
df = pd.DataFrame(
{
"a": list("abc"),
Expand All @@ -5178,7 +5180,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
"e": [True, False, True],
"f": pd.Categorical(list("abc")),
"g": pd.date_range("20130101", periods=3),
"h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
"h": pd.date_range("20130101", periods=3, tz=pytz.timezone("US/Eastern")),
"i": pd.date_range("20130101", periods=3, freq="ns"),
}
)
Expand Down
16 changes: 9 additions & 7 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2337,7 +2337,7 @@ Time zone handling
------------------

pandas provides rich support for working with timestamps in different time
zones using the ``pytz`` and ``dateutil`` libraries or :class:`datetime.timezone`
zones using the ``zoneinfo``, ``pytz`` and ``dateutil`` libraries or :class:`datetime.timezone`
objects from the standard library.


Expand All @@ -2354,14 +2354,14 @@ By default, pandas objects are time zone unaware:
To localize these dates to a time zone (assign a particular time zone to a naive date),
you can use the ``tz_localize`` method or the ``tz`` keyword argument in
:func:`date_range`, :class:`Timestamp`, or :class:`DatetimeIndex`.
You can either pass ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings.
You can either pass ``zoneinfo``, ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings.
Olson time zone strings will return ``pytz`` time zone objects by default.
To return ``dateutil`` time zone objects, append ``dateutil/`` before the string.

* In ``pytz`` you can find a list of common (and less common) time zones using
``from pytz import common_timezones, all_timezones``.
* For ``zoneinfo``, a list of available timezones are available from :py:func:`zoneinfo.available_timezones`.
* In ``pytz`` you can find a list of common (and less common) time zones using ``pytz.all_timezones``.
* ``dateutil`` uses the OS time zones so there isn't a fixed list available. For
common zones, the names are the same as ``pytz``.
common zones, the names are the same as ``pytz`` and ``zoneinfo``.

.. ipython:: python
Expand Down Expand Up @@ -2466,7 +2466,7 @@ you can use the ``tz_convert`` method.

.. warning::

If you are using dates beyond 2038-01-18, due to current deficiencies
If you are using dates beyond 2038-01-18 with ``pytz``, due to current deficiencies
in the underlying libraries caused by the year 2038 problem, daylight saving time (DST) adjustments
to timezone aware dates will not be applied. If and when the underlying libraries are fixed,
the DST transitions will be applied.
Expand All @@ -2475,9 +2475,11 @@ you can use the ``tz_convert`` method.

.. ipython:: python
import pytz
d_2037 = "2037-03-31T010101"
d_2038 = "2038-03-31T010101"
DST = "Europe/London"
DST = pytz.timezone("Europe/London")
assert pd.Timestamp(d_2037, tz=DST) != pd.Timestamp(d_2037, tz="GMT")
assert pd.Timestamp(d_2038, tz=DST) == pd.Timestamp(d_2038, tz="GMT")
Expand Down
14 changes: 7 additions & 7 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ class NaTType(_NaT):
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will be converted to.
None will remove timezone holding UTC time.
Expand Down Expand Up @@ -894,7 +894,7 @@ class NaTType(_NaT):
----------
ordinal : int
Date corresponding to a proleptic Gregorian ordinal.
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for the Timestamp.
Notes
Expand Down Expand Up @@ -1307,7 +1307,7 @@ timedelta}, default 'raise'
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will be converted to.
None will remove timezone holding UTC time.
Expand Down Expand Up @@ -1361,7 +1361,7 @@ timedelta}, default 'raise'
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will be converted to.
None will remove timezone holding local time.
Expand Down Expand Up @@ -1461,13 +1461,13 @@ default 'raise'
Replace timezone (not a conversion):
>>> import pytz
>>> ts.replace(tzinfo=pytz.timezone('US/Pacific'))
>>> import zoneinfo
>>> ts.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific')
Analogous for ``pd.NaT``:
>>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific'))
>>> pd.NaT.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
NaT
""",
)
Expand Down
14 changes: 7 additions & 7 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1374,7 +1374,7 @@ class Timestamp(_Timestamp):
Timezone info.
nanosecond : int, optional, default 0
Value of nanosecond.
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will have.
unit : str
Unit used for conversion if ts_input is of type int or float. The
Expand Down Expand Up @@ -1446,7 +1446,7 @@ class Timestamp(_Timestamp):
----------
ordinal : int
Date corresponding to a proleptic Gregorian ordinal.
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for the Timestamp.
Notes
Expand Down Expand Up @@ -2393,7 +2393,7 @@ timedelta}, default 'raise'
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will be converted to.
None will remove timezone holding local time.
Expand Down Expand Up @@ -2500,7 +2500,7 @@ default 'raise'
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will be converted to.
None will remove timezone holding UTC time.
Expand Down Expand Up @@ -2604,13 +2604,13 @@ default 'raise'
Replace timezone (not a conversion):
>>> import pytz
>>> ts.replace(tzinfo=pytz.timezone('US/Pacific'))
>>> import zoneinfo
>>> ts.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific')
Analogous for ``pd.NaT``:
>>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific'))
>>> pd.NaT.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
NaT
"""

Expand Down
39 changes: 19 additions & 20 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -119,27 +119,26 @@ cpdef inline object get_timezone(tzinfo tz):
raise TypeError("tz argument cannot be None")
if is_utc(tz):
return tz
elif is_zoneinfo(tz):
return tz.key
elif treat_tz_as_pytz(tz):
zone = tz.zone
if zone is None:
return tz
return zone
elif treat_tz_as_dateutil(tz):
if ".tar.gz" in tz._filename:
raise ValueError(
"Bad tz filename. Dateutil on python 3 on windows has a "
"bug which causes tzfile._filename to be the same for all "
"timezone files. Please construct dateutil timezones "
'implicitly by passing a string like "dateutil/Europe'
'/London" when you construct your pandas objects instead '
"of passing a timezone object. See "
"https://github.com/pandas-dev/pandas/pull/7362")
return "dateutil/" + tz._filename
else:
if treat_tz_as_dateutil(tz):
if ".tar.gz" in tz._filename:
raise ValueError(
"Bad tz filename. Dateutil on python 3 on windows has a "
"bug which causes tzfile._filename to be the same for all "
"timezone files. Please construct dateutil timezones "
'implicitly by passing a string like "dateutil/Europe'
'/London" when you construct your pandas objects instead '
"of passing a timezone object. See "
"https://github.com/pandas-dev/pandas/pull/7362")
return "dateutil/" + tz._filename
else:
# tz is a pytz timezone or unknown.
try:
zone = tz.zone
if zone is None:
return tz
return zone
except AttributeError:
return tz
return tz


cpdef inline tzinfo maybe_get_tz(object tz):
Expand Down
3 changes: 1 addition & 2 deletions pandas/_testing/_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from hypothesis import strategies as st
from hypothesis.extra.dateutil import timezones as dateutil_timezones
from hypothesis.extra.pytz import timezones as pytz_timezones

from pandas.compat import is_platform_windows

Expand Down Expand Up @@ -57,7 +56,7 @@
DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), # pyright: ignore[reportArgumentType]
max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), # pyright: ignore[reportArgumentType]
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
timezones=st.one_of(st.none(), dateutil_timezones(), st.timezones()),
)

DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def tz(self) -> tzinfo | None:
Returns
-------
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
zoneinfo.ZoneInfo,, datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
Returns None when the array is tz-naive.
See Also
Expand Down Expand Up @@ -624,7 +624,7 @@ def tz(self) -> tzinfo | None:
... )
>>> idx.tz
datetime.timezone.utc
"""
""" # noqa: E501
# GH 18595
return getattr(self.dtype, "tz", None)

Expand Down Expand Up @@ -863,7 +863,7 @@ def tz_convert(self, tz) -> Self:
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
Time zone for time. Corresponding timestamps would be converted
to this time zone of the Datetime Array/Index. A `tz` of None will
convert to UTC and remove the timezone information.
Expand Down Expand Up @@ -923,7 +923,7 @@ def tz_convert(self, tz) -> Self:
'2014-08-01 08:00:00',
'2014-08-01 09:00:00'],
dtype='datetime64[ns]', freq='h')
"""
""" # noqa: E501
tz = timezones.maybe_get_tz(tz)

if self.tz is None:
Expand Down Expand Up @@ -955,7 +955,7 @@ def tz_localize(
Parameters
----------
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
Time zone to convert timestamps to. Passing ``None`` will
remove the time zone information preserving local time.
ambiguous : 'infer', 'NaT', bool array, default 'raise'
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def tz_localize(
0 2015-03-29 03:30:00+02:00
1 2015-03-29 03:30:00+02:00
dtype: datetime64[ns, Europe/Warsaw]
"""
""" # noqa: E501
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
if nonexistent not in nonexistent_options and not isinstance(
nonexistent, timedelta
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
One of pandas date offset strings or corresponding objects. The string
'infer' can be passed in order to set the frequency of the index as the
inferred frequency upon creation.
tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str
tz : zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or str
Set the Timezone of the data.
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
When clocks moved backward due to DST, ambiguous times may arise.
Expand Down
Loading

0 comments on commit 1cf98aa

Please sign in to comment.