Skip to content

Commit

Permalink
Merge pull request #8941 from hkleynhans/fix/8683_resample_value_error
Browse files Browse the repository at this point in the history
BUG: Resample across multiple days
  • Loading branch information
jorisvandenbossche committed Nov 30, 2014
2 parents 8290a4d + 5a2b19b commit 2de0084
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ Bug Fixes
- BUG: Option context applies on __enter__ (:issue:`8514`)



- Bug in resample that causes a ValueError when resampling across multiple days
and the last offset is not calculated from the start of the range (:issue:`8683`)


- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
Expand Down
10 changes: 7 additions & 3 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,19 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
from pandas.tseries.tools import normalize_date

# First and last offsets should be calculated from the start day to fix an
# error cause by resampling across multiple days when a one day period is
# not a multiple of the frequency.
#
# See https://github.com/pydata/pandas/issues/8683

start_day_nanos = Timestamp(normalize_date(first)).value
last_day_nanos = Timestamp(normalize_date(last)).value

base_nanos = (base % offset.n) * offset.nanos // offset.n
start_day_nanos += base_nanos
last_day_nanos += base_nanos

foffset = (first.value - start_day_nanos) % offset.nanos
loffset = (last.value - last_day_nanos) % offset.nanos
loffset = (last.value - start_day_nanos) % offset.nanos

if closed == 'right':
if foffset > 0:
Expand Down
23 changes: 23 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,29 @@ def test_resample_anchored_monthstart(self):
for freq in freqs:
result = ts.resample(freq, how='mean')

def test_resample_anchored_multiday(self):
# When resampling a range spanning multiple days, ensure that the
# start date gets used to determine the offset. Fixes issue where
# a one day period is not a multiple of the frequency.
#
# See: https://github.com/pydata/pandas/issues/8683

s = pd.Series(np.random.randn(5),
index=pd.date_range('2014-10-14 23:06:23.206',
periods=3, freq='400L')
| pd.date_range('2014-10-15 23:00:00',
periods=2, freq='2200L'))

# Ensure left closing works
result = s.resample('2200L', 'mean')
self.assertEqual(result.index[-1],
pd.Timestamp('2014-10-15 23:00:02.000'))

# Ensure right closing works
result = s.resample('2200L', 'mean', label='right')
self.assertEqual(result.index[-1],
pd.Timestamp('2014-10-15 23:00:04.200'))

def test_corner_cases(self):
# miscellaneous test coverage

Expand Down

0 comments on commit 2de0084

Please sign in to comment.