Skip to content

Commit

Permalink
API/BUG: Enforce "normalized" pytz timezones for DatetimeIndex (#20510)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and jreback committed Apr 11, 2018
1 parent d91b706 commit fa24af9
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 14 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ Datetimelike API Changes
- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`)
- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`)
- :class:`Timestamp` constructor now accepts a `nanosecond` keyword or positional argument (:issue:`18898`)
- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`)
- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`)

.. _whatsnew_0230.api.other:

Expand Down Expand Up @@ -1127,6 +1129,7 @@ Groupby/Resample/Rolling
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`)
- Bug in :func:`Dataframe.resample` that dropped timezone information (:issue:`13238`)

Sparse
^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,12 @@ class Timestamp(_Timestamp):
"""
return self.tzinfo

@tz.setter
def tz(self, value):
# GH 3746: Prevent localizing or converting the index by setting tz
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

def __setstate__(self, state):
self.value = state[0]
self.freq = state[1]
Expand Down
38 changes: 38 additions & 0 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,41 @@ cpdef bint tz_compare(object start, object end):
"""
# GH 18523
return get_timezone(start) == get_timezone(end)


cpdef tz_standardize(object tz):
"""
If the passed tz is a pytz timezone object, "normalize" it to the a
consistent version
Parameters
----------
tz : tz object
Returns:
-------
tz object
Examples:
--------
>>> tz
<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
>>> tz_standardize(tz)
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
>>> tz
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
>>> tz_standardize(tz)
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
>>> tz
dateutil.tz.tz.tzutc
>>> tz_standardize(tz)
dateutil.tz.tz.tzutc
"""
if treat_tz_as_pytz(tz):
return pytz.timezone(str(tz))
return tz
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,7 @@ def shift(self, n, freq=None):
result = self + offset

if hasattr(self, 'tz'):
result.tz = self.tz
result._tz = self.tz

return result

Expand Down
28 changes: 17 additions & 11 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,13 +511,7 @@ def _generate(cls, start, end, periods, name, offset,
'different timezones')

inferred_tz = timezones.maybe_get_tz(inferred_tz)

# these may need to be localized
tz = timezones.maybe_get_tz(tz)
if tz is not None:
date = start or end
if date.tzinfo is not None and hasattr(tz, 'localize'):
tz = tz.localize(date.replace(tzinfo=None)).tzinfo

if tz is not None and inferred_tz is not None:
if not timezones.tz_compare(inferred_tz, tz):
Expand Down Expand Up @@ -654,7 +648,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
result._data = values
result.name = name
result.offset = freq
result.tz = timezones.maybe_get_tz(tz)
result._tz = timezones.maybe_get_tz(tz)
result._tz = timezones.tz_standardize(result._tz)
result._reset_identity()
return result

Expand Down Expand Up @@ -684,6 +679,17 @@ def _values(self):
else:
return self.values

@property
def tz(self):
# GH 18595
return self._tz

@tz.setter
def tz(self, value):
# GH 3746: Prevent localizing or converting the index by setting tz
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

@property
def tzinfo(self):
"""
Expand Down Expand Up @@ -754,7 +760,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,

cachedRange = DatetimeIndex._simple_new(arr)
cachedRange.offset = offset
cachedRange.tz = None
cachedRange = cachedRange.tz_localize(None)
cachedRange.name = None
drc[offset] = cachedRange
else:
Expand Down Expand Up @@ -831,7 +837,7 @@ def __setstate__(self, state):

self.name = own_state[0]
self.offset = own_state[1]
self.tz = own_state[2]
self._tz = timezones.tz_standardize(own_state[2])

# provide numpy < 1.7 compat
if nd_state[2] == 'M8[us]':
Expand Down Expand Up @@ -1175,7 +1181,7 @@ def union(self, other):
else:
result = Index.union(this, other)
if isinstance(result, DatetimeIndex):
result.tz = this.tz
result._tz = timezones.tz_standardize(this.tz)
if (result.freq is None and
(this.freq is not None or other.freq is not None)):
result.offset = to_offset(result.inferred_freq)
Expand Down Expand Up @@ -1223,7 +1229,7 @@ def union_many(self, others):
tz = this.tz
this = Index.union(this, other)
if isinstance(this, DatetimeIndex):
this.tz = tz
this._tz = timezones.tz_standardize(tz)

if this.freq is None:
this.offset = to_offset(this.inferred_freq)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ def test_set_index_cast_datetimeindex(self):
# convert to utc
df['C'] = i.to_series().reset_index(drop=True)
result = df['C']
comp = pd.DatetimeIndex(expected.values).copy()
comp.tz = None
comp = pd.DatetimeIndex(expected.values)
comp = comp.tz_localize(None)
tm.assert_numpy_array_equal(result.values, comp.values)

# list of datetimes with a tz
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,34 @@ def test_000constructor_resolution(self):

assert idx.nanosecond[0] == t1.nanosecond

def test_disallow_setting_tz(self):
# GH 3746
dti = DatetimeIndex(['2010'], tz='UTC')
with pytest.raises(AttributeError):
dti.tz = pytz.timezone('US/Pacific')

@pytest.mark.parametrize('tz', [
None, 'America/Los_Angeles', pytz.timezone('America/Los_Angeles'),
Timestamp('2000', tz='America/Los_Angeles').tz])
def test_constructor_start_end_with_tz(self, tz):
# GH 18595
start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles')
end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles')
result = DatetimeIndex(freq='D', start=start, end=end, tz=tz)
expected = DatetimeIndex(['2013-01-01 06:00:00',
'2013-01-02 06:00:00'],
tz='America/Los_Angeles')
tm.assert_index_equal(result, expected)
# Especially assert that the timezone is consistent for pytz
assert pytz.timezone('America/Los_Angeles') is result.tz

@pytest.mark.parametrize('tz', ['US/Pacific', 'US/Eastern', 'Asia/Tokyo'])
def test_constructor_with_non_normalized_pytz(self, tz):
# GH 18595
non_norm_tz = Timestamp('2010', tz=tz).tz
result = DatetimeIndex(['2010'], tz=non_norm_tz)
assert pytz.timezone(tz) is result.tz


class TestTimeSeries(object):

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,13 @@ def test_today(self):
assert (abs(ts_from_string_tz.tz_localize(None) -
ts_from_method_tz.tz_localize(None)) < delta)

@pytest.mark.parametrize('tz', [None, pytz.timezone('US/Pacific')])
def test_disallow_setting_tz(self, tz):
# GH 3746
ts = Timestamp('2010')
with pytest.raises(AttributeError):
ts.tz = tz


class TestTimestamp(object):

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,18 @@ def test_with_local_timezone_pytz(self):
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)

def test_resample_with_pytz(self):
# GH 13238
s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
tz="US/Eastern"))
result = s.resample("D").mean()
expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
'2017-01-02'],
tz="US/Eastern"))
assert_series_equal(result, expected)
# Especially assert that the timezone is LMT for pytz
assert result.index.tz == pytz.timezone('US/Eastern')

def test_with_local_timezone_dateutil(self):
# see gh-5430
local_timezone = 'dateutil/America/Los_Angeles'
Expand Down

0 comments on commit fa24af9

Please sign in to comment.