Skip to content

Commit

Permalink
fix for BUG: grouping with tz-aware: Values falls after last bin (#24973
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ahcub authored and TomAugspurger committed Jan 29, 2019
1 parent 1fc88c7 commit abf0824
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 18 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.24.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ Bug Fixes

**Reshaping**

-
-
- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`)

**Visualization**

Expand Down
31 changes: 15 additions & 16 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range

from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import (
DateOffset, Day, Nano, Tick, delta_to_nanoseconds)
from pandas.tseries.offsets import DateOffset, Day, Nano, Tick

_shared_docs_kwargs = dict()

Expand Down Expand Up @@ -1613,20 +1612,20 @@ def _get_timestamp_range_edges(first, last, offset, closed='left', base=0):
A tuple of length 2, containing the adjusted pd.Timestamp objects.
"""
if isinstance(offset, Tick):
is_day = isinstance(offset, Day)
day_nanos = delta_to_nanoseconds(timedelta(1))

# #1165 and #24127
if (is_day and not offset.nanos % day_nanos) or not is_day:
first, last = _adjust_dates_anchored(first, last, offset,
closed=closed, base=base)
if is_day and first.tz is not None:
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
# might contain a DST transition (23H, 24H, or 25H).
# Ensure first/last snap to midnight.
first = first.normalize()
last = last.normalize()
return first, last
if isinstance(offset, Day):
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
# might contain a DST transition (23H, 24H, or 25H).
# So "pretend" the dates are naive when adjusting the endpoints
tz = first.tz
first = first.tz_localize(None)
last = last.tz_localize(None)

first, last = _adjust_dates_anchored(first, last, offset,
closed=closed, base=base)
if isinstance(offset, Day):
first = first.tz_localize(tz)
last = last.tz_localize(tz)
return first, last

else:
first = first.normalize()
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,21 @@ def test_resample_across_dst():
assert_frame_equal(result, expected)


def test_groupby_with_dst_time_change():
# GH 24972
index = pd.DatetimeIndex([1478064900001000000, 1480037118776792000],
tz='UTC').tz_convert('America/Chicago')

df = pd.DataFrame([1, 2], index=index)
result = df.groupby(pd.Grouper(freq='1d')).last()
expected_index_values = pd.date_range('2016-11-02', '2016-11-24',
freq='d', tz='America/Chicago')

index = pd.DatetimeIndex(expected_index_values)
expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index)
assert_frame_equal(result, expected)


def test_resample_dst_anchor():
# 5172
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')
Expand Down

0 comments on commit abf0824

Please sign in to comment.