Skip to content

Commit

Permalink
implement shift_quarters --> apply_index for quarters and years (#18522)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Nov 27, 2017
1 parent 34b036c commit 88ab693
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 53 deletions.
156 changes: 154 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ np.import_array()

from util cimport is_string_object, is_integer_object

from pandas._libs.tslib import monthrange

from conversion cimport tz_convert_single, pydt_to_i8
from frequencies cimport get_freq_code
from nattype cimport NPY_NAT
Expand Down Expand Up @@ -471,6 +469,160 @@ cdef inline int month_add_months(pandas_datetimestruct dts, int months) nogil:
return 12 if new_month == 0 else new_month


@cython.wraparound(False)
@cython.boundscheck(False)
def shift_quarters(int64_t[:] dtindex, int quarters,
int q1start_month, object day, int modby=3):
"""
Given an int64 array representing nanosecond timestamps, shift all elements
by the specified number of quarters using DateOffset semantics.
Parameters
----------
dtindex : int64_t[:] timestamps for input dates
quarters : int number of quarters to shift
q1start_month : int month in which Q1 begins by convention
day : {'start', 'end', 'business_start', 'business_end'}
modby : int (3 for quarters, 12 for years)
Returns
-------
out : ndarray[int64_t]
"""
cdef:
Py_ssize_t i
pandas_datetimestruct dts
int count = len(dtindex)
int months_to_roll, months_since, n, compare_day
bint roll_check
int64_t[:] out = np.empty(count, dtype='int64')

if day == 'start':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby

# offset semantics - if on the anchor point and going backwards
# shift to next
if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > 1)):
n += 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)
dts.day = 1

out[i] = dtstruct_to_dt64(&dts)

elif day == 'end':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby

if n <= 0 and months_since != 0:
# The general case of this condition would be
# `months_since != 0 or (months_since == 0 and
# dts.day > get_days_in_month(dts.year, dts.month))`
# but the get_days_in_month inequality would never hold.
n += 1
elif n > 0 and (months_since == 0 and
dts.day < get_days_in_month(dts.year,
dts.month)):
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)
dts.day = get_days_in_month(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

elif day == 'business_start':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby
compare_month = dts.month - months_since
compare_month = compare_month or 12
# compare_day is only relevant for comparison in the case
# where months_since == 0.
compare_day = get_firstbday(dts.year, compare_month)

if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > compare_day)):
# make sure to roll forward, so negate
n += 1
elif n > 0 and (months_since == 0 and dts.day < compare_day):
# pretend to roll back if on same month but
# before compare_day
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)

dts.day = get_firstbday(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

elif day == 'business_end':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby
compare_month = dts.month - months_since
compare_month = compare_month or 12
# compare_day is only relevant for comparison in the case
# where months_since == 0.
compare_day = get_lastbday(dts.year, compare_month)

if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > compare_day)):
# make sure to roll forward, so negate
n += 1
elif n > 0 and (months_since == 0 and dts.day < compare_day):
# pretend to roll back if on same month but
# before compare_day
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)

dts.day = get_lastbday(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

else:
raise ValueError("day must be None, 'start', 'end', "
"'business_start', or 'business_end'")

return np.asarray(out)


@cython.wraparound(False)
@cython.boundscheck(False)
def shift_months(int64_t[:] dtindex, int months, object day=None):
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/tseries/offsets/test_yqm_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,15 @@ def test_quarterly_dont_normalize():
assert (result.time() == date.time())


@pytest.mark.parametrize('offset', [MonthBegin(), MonthEnd(),
BMonthBegin(), BMonthEnd()])
def test_apply_index(offset):
@pytest.mark.parametrize('n', [-2, 1])
@pytest.mark.parametrize('cls', [MonthBegin, MonthEnd,
BMonthBegin, BMonthEnd,
QuarterBegin, QuarterEnd,
BQuarterBegin, BQuarterEnd,
YearBegin, YearEnd,
BYearBegin, BYearEnd])
def test_apply_index(cls, n):
offset = cls(n=n)
rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
ser = pd.Series(rng)

Expand Down
78 changes: 30 additions & 48 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
apply_index_wraps,
roll_yearday,
shift_month,
BeginMixin, EndMixin,
EndMixin,
BaseOffset)


Expand Down Expand Up @@ -1028,10 +1028,7 @@ def cbday(self):

@cache_readonly
def m_offset(self):
kwds = self.kwds
kwds = {key: kwds[key] for key in kwds
if key not in ['calendar', 'weekmask', 'holidays', 'offset']}
return MonthEnd(n=1, normalize=self.normalize, **kwds)
return MonthEnd(n=1, normalize=self.normalize)

@apply_wraps
def apply(self, other):
Expand Down Expand Up @@ -1106,10 +1103,7 @@ def cbday(self):

@cache_readonly
def m_offset(self):
kwds = self.kwds
kwds = {key: kwds[key] for key in kwds
if key not in ['calendar', 'weekmask', 'holidays', 'offset']}
return MonthBegin(n=1, normalize=self.normalize, **kwds)
return MonthBegin(n=1, normalize=self.normalize)

@apply_wraps
def apply(self, other):
Expand Down Expand Up @@ -1254,12 +1248,9 @@ def onOffset(self, dt):

def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other = other.replace(day=self.day_of_month)
if n > 0:
n -= 1
if n > 0 and other.day < self.day_of_month:
n -= 1
elif other.day > self.day_of_month:
other = other.replace(day=self.day_of_month)
n += 1

months = n // 2
Expand Down Expand Up @@ -1309,12 +1300,9 @@ def onOffset(self, dt):
def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other = other.replace(day=self.day_of_month)
n -= 1
elif other.day > self.day_of_month:
other = other.replace(day=self.day_of_month)
if n <= 0:
n += 1
elif n <= 0 and other.day > self.day_of_month:
n += 1

months = n // 2 + n % 2
day = 1 if n % 2 else self.day_of_month
Expand Down Expand Up @@ -1471,6 +1459,7 @@ def apply(self, other):
def getOffsetOfMonth(self, dt):
w = Week(weekday=self.weekday)
d = datetime(dt.year, dt.month, 1, tzinfo=dt.tzinfo)
# TODO: Is this DST-safe?
d = w.rollforward(d)
return d + timedelta(weeks=self.week)

Expand Down Expand Up @@ -1550,6 +1539,7 @@ def getOffsetOfMonth(self, dt):
d = datetime(dt.year, dt.month, 1, dt.hour, dt.minute,
dt.second, dt.microsecond, tzinfo=dt.tzinfo)
eom = m.rollforward(d)
# TODO: Is this DST-safe?
w = Week(weekday=self.weekday)
return w.rollback(eom)

Expand Down Expand Up @@ -1635,6 +1625,12 @@ def onOffset(self, dt):
modMonth = (dt.month - self.startingMonth) % 3
return modMonth == 0 and dt.day == self._get_offset_day(dt)

@apply_index_wraps
def apply_index(self, dtindex):
shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
self.startingMonth, self._day_opt)
return dtindex._shallow_copy(shifted)


class BQuarterEnd(QuarterOffset):
"""DateOffset increments between business Quarter dates
Expand All @@ -1659,7 +1655,7 @@ class BQuarterBegin(QuarterOffset):
_day_opt = 'business_start'


class QuarterEnd(EndMixin, QuarterOffset):
class QuarterEnd(QuarterOffset):
"""DateOffset increments between business Quarter dates
startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ...
startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ...
Expand All @@ -1670,25 +1666,14 @@ class QuarterEnd(EndMixin, QuarterOffset):
_prefix = 'Q'
_day_opt = 'end'

@apply_index_wraps
def apply_index(self, i):
return self._end_apply_index(i, self.freqstr)


class QuarterBegin(BeginMixin, QuarterOffset):
class QuarterBegin(QuarterOffset):
_outputName = 'QuarterBegin'
_default_startingMonth = 3
_from_name_startingMonth = 1
_prefix = 'QS'
_day_opt = 'start'

@apply_index_wraps
def apply_index(self, i):
freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1
month = liboffsets._int_to_month[freq_month]
freqstr = 'Q-{month}'.format(month=month)
return self._beg_apply_index(i, freqstr)


# ---------------------------------------------------------------------
# Year-Based Offset Classes
Expand All @@ -1709,6 +1694,13 @@ def apply(self, other):
months = years * 12 + (self.month - other.month)
return shift_month(other, months, self._day_opt)

@apply_index_wraps
def apply_index(self, dtindex):
shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
self.month, self._day_opt,
modby=12)
return dtindex._shallow_copy(shifted)

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
return False
Expand Down Expand Up @@ -1752,31 +1744,19 @@ class BYearBegin(YearOffset):
_day_opt = 'business_start'


class YearEnd(EndMixin, YearOffset):
class YearEnd(YearOffset):
"""DateOffset increments between calendar year ends"""
_default_month = 12
_prefix = 'A'
_day_opt = 'end'

@apply_index_wraps
def apply_index(self, i):
# convert month anchor to annual period tuple
return self._end_apply_index(i, self.freqstr)


class YearBegin(BeginMixin, YearOffset):
class YearBegin(YearOffset):
"""DateOffset increments between calendar year begin dates"""
_default_month = 1
_prefix = 'AS'
_day_opt = 'start'

@apply_index_wraps
def apply_index(self, i):
freq_month = 12 if self.month == 1 else self.month - 1
month = liboffsets._int_to_month[freq_month]
freqstr = 'A-{month}'.format(month=month)
return self._beg_apply_index(i, freqstr)


# ---------------------------------------------------------------------
# Special Offset Classes
Expand Down Expand Up @@ -2245,7 +2225,8 @@ def __eq__(self, other):
if isinstance(other, Tick):
return self.delta == other.delta
else:
return DateOffset.__eq__(self, other)
# TODO: Are there cases where this should raise TypeError?
return False

# This is identical to DateOffset.__hash__, but has to be redefined here
# for Python 3, because we've redefined __eq__.
Expand All @@ -2261,7 +2242,8 @@ def __ne__(self, other):
if isinstance(other, Tick):
return self.delta != other.delta
else:
return DateOffset.__ne__(self, other)
# TODO: Are there cases where this should raise TypeError?
return True

@property
def delta(self):
Expand Down

0 comments on commit 88ab693

Please sign in to comment.