Skip to content

Commit

Permalink
BUG: .asfreq on resample on PeriodIndex/TimedeltaIndex are not
Browse files Browse the repository at this point in the history
      including the full range

closes pandas-dev#12926
  • Loading branch information
jreback committed Apr 20, 2016
1 parent 1320ef7 commit a3c9052
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 29 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ Bug Fixes


- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`)
- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`)
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)

Expand Down
41 changes: 28 additions & 13 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,26 @@ def _maybe_add_count(base, count):
return base


def _maybe_coerce_freq(code):
""" we might need to coerce a code to a rule_code
and uppercase it
Parameters
----------
source : string
Frequency converting from
Returns
-------
string code
"""

assert code is not None
if isinstance(code, offsets.DateOffset):
code = code.rule_code
return code.upper()


def is_subperiod(source, target):
"""
Returns True if downsampling is possible between source and target
Expand All @@ -1140,14 +1160,12 @@ def is_subperiod(source, target):
-------
is_subperiod : boolean
"""
if isinstance(source, offsets.DateOffset):
source = source.rule_code

if isinstance(target, offsets.DateOffset):
target = target.rule_code
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)

target = target.upper()
source = source.upper()
if _is_annual(target):
if _is_quarterly(source):
return _quarter_months_conform(_get_rule_month(source),
Expand Down Expand Up @@ -1195,14 +1213,11 @@ def is_superperiod(source, target):
-------
is_superperiod : boolean
"""
if isinstance(source, offsets.DateOffset):
source = source.rule_code

if isinstance(target, offsets.DateOffset):
target = target.rule_code
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)

target = target.upper()
source = source.upper()
if _is_annual(source):
if _is_annual(target):
return _get_rule_month(source) == _get_rule_month(target)
Expand Down
35 changes: 28 additions & 7 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,14 @@ def _downsample(self, how, **kwargs):

return self._wrap_result(result)

def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner

def _upsample(self, method, limit=None):
"""
method : string {'backfill', 'bfill', 'pad', 'ffill'}
Expand All @@ -614,11 +622,7 @@ def _upsample(self, method, limit=None):
ax = self.ax
obj = self._selected_obj
binner = self.binner

if self.closed == 'right':
res_index = binner[1:]
else:
res_index = binner[:-1]
res_index = self._adjust_binner_for_upsample(binner)

# if we have the same frequency as our axis, then we are equal sampling
if limit is None and to_offset(ax.inferred_freq) == self.freq:
Expand Down Expand Up @@ -764,6 +768,20 @@ class TimedeltaResampler(DatetimeIndexResampler):
def _get_binner_for_time(self):
return self.groupby._get_time_delta_bins(self.ax)

def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
ax = self.ax

if is_subperiod(ax.freq, self.freq):
# We are actually downsampling
# but are in the asfreq path
# GH 12926
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner


def resample(obj, kind=None, **kwds):
""" create a TimeGrouper and return our resampler """
Expand Down Expand Up @@ -1004,8 +1022,11 @@ def _get_time_delta_bins(self, ax):
data=[], freq=self.freq, name=ax.name)
return binner, [], labels

labels = binner = TimedeltaIndex(start=ax[0],
end=ax[-1],
# we need 1 extra bin here to accomodate the self.closed
start = ax[0]
end = ax[-1]
labels = binner = TimedeltaIndex(start=start,
end=end,
freq=self.freq,
name=ax.name)

Expand Down
9 changes: 9 additions & 0 deletions pandas/tseries/tests/test_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,15 @@ def test_legacy_offset_warnings(self):


def test_is_superperiod_subperiod():

# input validation
assert not (frequencies.is_superperiod(offsets.YearEnd(), None))
assert not (frequencies.is_subperiod(offsets.MonthEnd(), None))
assert not (frequencies.is_superperiod(None, offsets.YearEnd()))
assert not (frequencies.is_subperiod(None, offsets.MonthEnd()))
assert not (frequencies.is_superperiod(None, None))
assert not (frequencies.is_subperiod(None, None))

assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))

Expand Down
143 changes: 134 additions & 9 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pandas.tseries.period import period_range, PeriodIndex, Period
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
DatetimeIndexResampler)
from pandas.tseries.frequencies import to_offset
from pandas.tseries.tdi import timedelta_range
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
assert_frame_equal)
Expand All @@ -35,6 +36,16 @@
resample_methods = downsample_methods + upsample_methods + series_methods


def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)


def _simple_pts(start, end, freq='D'):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)


class TestResampleAPI(tm.TestCase):
_multiprocess_can_split_ = True

Expand Down Expand Up @@ -566,15 +577,63 @@ def test_agg_consistency(self):
assert_frame_equal(result, expected)


class TestResample(tm.TestCase):
class Base(object):
"""
base class for resampling testing, calling
.create_series() generates a series of each index type
"""
def create_index(self, *args, **kwargs):
""" return the _index_factory created using the args, kwargs """
factory = self._index_factory()
return factory(*args, **kwargs)

def test_asfreq_downsample(self):
s = self.create_series()

result = s.resample('2D').asfreq()
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
expected.index.freq = to_offset('2D')
assert_series_equal(result, expected)

frame = s.to_frame('value')
result = frame.resample('2D').asfreq()
expected = frame.reindex(
frame.index.take(np.arange(0, len(frame.index), 2)))
expected.index.freq = to_offset('2D')
assert_frame_equal(result, expected)

def test_asfreq_upsample(self):
s = self.create_series()

result = s.resample('1H').asfreq()
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
expected = s.reindex(new_index)
assert_series_equal(result, expected)

frame = s.to_frame('value')
result = frame.resample('1H').asfreq()
new_index = self.create_index(frame.index[0],
frame.index[-1], freq='1H')
expected = frame.reindex(new_index)
assert_frame_equal(result, expected)


class TestDatetimeIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: date_range

def setUp(self):
dti = DatetimeIndex(start=datetime(2005, 1, 1),
end=datetime(2005, 1, 10), freq='Min')

self.series = Series(np.random.rand(len(dti)), dti)

def create_series(self):
i = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')

return Series(np.arange(len(i)), index=i, name='dti')

def test_custom_grouper(self):

dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
Expand Down Expand Up @@ -1798,18 +1857,61 @@ def test_resmaple_dst_anchor(self):
'D Frequency')


def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
class TestPeriodIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: period_range

def create_series(self):
i = period_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')

def _simple_pts(start, end, freq='D'):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return Series(np.arange(len(i)), index=i, name='pi')

def test_asfreq_downsample(self):

class TestResamplePeriodIndex(tm.TestCase):
_multiprocess_can_split_ = True
# series
s = self.create_series()
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
expected.index = expected.index.to_timestamp()
expected.index.freq = to_offset('2D')

# this is a bug, this *should* return a PeriodIndex
# directly
# GH 12884
result = s.resample('2D').asfreq()
assert_series_equal(result, expected)

# frame
frame = s.to_frame('value')
expected = frame.reindex(
frame.index.take(np.arange(0, len(frame.index), 2)))
expected.index = expected.index.to_timestamp()
expected.index.freq = to_offset('2D')
result = frame.resample('2D').asfreq()
assert_frame_equal(result, expected)

def test_asfreq_upsample(self):

# this is a bug, this *should* return a PeriodIndex
# directly
# GH 12884
s = self.create_series()
new_index = date_range(s.index[0].to_timestamp(how='start'),
(s.index[-1] + 1).to_timestamp(how='start'),
freq='1H',
closed='left')
expected = s.to_timestamp().reindex(new_index).to_period()
result = s.resample('1H').asfreq()
assert_series_equal(result, expected)

frame = s.to_frame('value')
new_index = date_range(frame.index[0].to_timestamp(how='start'),
(frame.index[-1] + 1).to_timestamp(how='start'),
freq='1H',
closed='left')
expected = frame.to_timestamp().reindex(new_index).to_period()
result = frame.resample('1H').asfreq()
assert_frame_equal(result, expected)

def test_annual_upsample_D_s_f(self):
self._check_annual_upsample_cases('D', 'start', 'ffill')
Expand Down Expand Up @@ -2336,6 +2438,29 @@ def test_evenly_divisible_with_no_extra_bins(self):
assert_frame_equal(result, expected)


class TestTimedeltaIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: timedelta_range

def create_series(self):
i = timedelta_range('1 day',
'10 day', freq='D')

return Series(np.arange(len(i)), index=i, name='tdi')

def test_asfreq_bug(self):

import datetime as dt
df = DataFrame(data=[1, 3],
index=[dt.timedelta(), dt.timedelta(minutes=3)])
result = df.resample('1T').asfreq()
expected = DataFrame(data=[1, np.nan, np.nan, 3],
index=timedelta_range('0 day',
periods=4,
freq='1T'))
assert_frame_equal(result, expected)


class TestTimeGrouper(tm.TestCase):
def setUp(self):
self.ts = Series(np.random.randn(1000),
Expand Down

0 comments on commit a3c9052

Please sign in to comment.