Skip to content

Commit

Permalink
Start porting offsets to cython (pandas-dev#17830)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and peterpanmj committed Oct 31, 2017
1 parent b3e8f69 commit 58a3a90
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 176 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Other API Changes

- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` arguments (:issue:`17690`)
-
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`)
-

.. _whatsnew_0220.deprecations:
Expand Down
208 changes: 208 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# -*- coding: utf-8 -*-
# cython: profile=False

cimport cython

import time
from cpython.datetime cimport time as dt_time

import numpy as np
cimport numpy as np
np.import_array()


from util cimport is_string_object


from pandas._libs.tslib import pydt_to_i8, tz_convert_single

# ---------------------------------------------------------------------
# Constants

# Duplicated in tslib
_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
_int_to_month = {(k + 1): v for k, v in enumerate(_MONTHS)}
_month_to_int = dict((v, k) for k, v in _int_to_month.items())


class WeekDay(object):
MON = 0
TUE = 1
WED = 2
THU = 3
FRI = 4
SAT = 5
SUN = 6


_int_to_weekday = {
WeekDay.MON: 'MON',
WeekDay.TUE: 'TUE',
WeekDay.WED: 'WED',
WeekDay.THU: 'THU',
WeekDay.FRI: 'FRI',
WeekDay.SAT: 'SAT',
WeekDay.SUN: 'SUN'}

_weekday_to_int = {_int_to_weekday[key]: key for key in _int_to_weekday}


_offset_to_period_map = {
'WEEKDAY': 'D',
'EOM': 'M',
'BM': 'M',
'BQS': 'Q',
'QS': 'Q',
'BQ': 'Q',
'BA': 'A',
'AS': 'A',
'BAS': 'A',
'MS': 'M',
'D': 'D',
'C': 'C',
'B': 'B',
'T': 'T',
'S': 'S',
'L': 'L',
'U': 'U',
'N': 'N',
'H': 'H',
'Q': 'Q',
'A': 'A',
'W': 'W',
'M': 'M',
'Y': 'A',
'BY': 'A',
'YS': 'A',
'BYS': 'A'}

need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS']


for __prefix in need_suffix:
for _m in _MONTHS:
key = '%s-%s' % (__prefix, _m)
_offset_to_period_map[key] = _offset_to_period_map[__prefix]

for __prefix in ['A', 'Q']:
for _m in _MONTHS:
_alias = '%s-%s' % (__prefix, _m)
_offset_to_period_map[_alias] = _alias

_days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
for _d in _days:
_offset_to_period_map['W-%s' % _d] = 'W-%s' % _d


# ---------------------------------------------------------------------
# Misc Helpers

def as_datetime(obj):
f = getattr(obj, 'to_pydatetime', None)
if f is not None:
obj = f()
return obj


def _is_normalized(dt):
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
return False
return True


# ---------------------------------------------------------------------
# Business Helpers

def _get_firstbday(wkday):
"""
wkday is the result of monthrange(year, month)
If it's a saturday or sunday, increment first business day to reflect this
"""
first = 1
if wkday == 5: # on Saturday
first = 3
elif wkday == 6: # on Sunday
first = 2
return first


def _get_calendar(weekmask, holidays, calendar):
"""Generate busdaycalendar"""
if isinstance(calendar, np.busdaycalendar):
if not holidays:
holidays = tuple(calendar.holidays)
elif not isinstance(holidays, tuple):
holidays = tuple(holidays)
else:
# trust that calendar.holidays and holidays are
# consistent
pass
return calendar, holidays

if holidays is None:
holidays = []
try:
holidays = holidays + calendar.holidays().tolist()
except AttributeError:
pass
holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in holidays]
holidays = tuple(sorted(holidays))

kwargs = {'weekmask': weekmask}
if holidays:
kwargs['holidays'] = holidays

busdaycalendar = np.busdaycalendar(**kwargs)
return busdaycalendar, holidays


def _to_dt64(dt, dtype='datetime64'):
# Currently
# > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]')
# numpy.datetime64('2013-05-01T02:00:00.000000+0200')
# Thus astype is needed to cast datetime to datetime64[D]
if getattr(dt, 'tzinfo', None) is not None:
i8 = pydt_to_i8(dt)
dt = tz_convert_single(i8, 'UTC', dt.tzinfo)
dt = np.int64(dt).astype('datetime64[ns]')
else:
dt = np.datetime64(dt)
if dt.dtype.name != dtype:
dt = dt.astype(dtype)
return dt


# ---------------------------------------------------------------------
# Validation


def _validate_business_time(t_input):
if is_string_object(t_input):
try:
t = time.strptime(t_input, '%H:%M')
return dt_time(hour=t.tm_hour, minute=t.tm_min)
except ValueError:
raise ValueError("time data must match '%H:%M' format")
elif isinstance(t_input, dt_time):
if t_input.second != 0 or t_input.microsecond != 0:
raise ValueError(
"time data must be specified only with hour and minute")
return t_input
else:
raise ValueError("time data must be string or datetime.time")

# ---------------------------------------------------------------------
# Mixins & Singletons


class ApplyTypeError(TypeError):
# sentinel class for catching the apply error to return NotImplemented
pass


# TODO: unused. remove?
class CacheableOffset(object):
_cacheable = True
5 changes: 3 additions & 2 deletions pandas/tests/tseries/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@
get_offset, get_standard_freq)
from pandas.core.indexes.datetimes import (
_to_m8, DatetimeIndex, _daterange_cache)
from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset
from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
BusinessHour, WeekOfMonth, CBMonthEnd,
CustomBusinessHour, WeekDay,
CustomBusinessHour,
CBMonthBegin, BYearEnd, MonthEnd,
MonthBegin, SemiMonthBegin, SemiMonthEnd,
BYearBegin, QuarterBegin, BQuarterBegin,
BMonthBegin, DateOffset, Week, YearBegin,
YearEnd, Hour, Minute, Second, Day, Micro,
QuarterEnd, BusinessMonthEnd, FY5253,
Milli, Nano, Easter, FY5253Quarter,
LastWeekOfMonth, CacheableOffset)
LastWeekOfMonth)
from pandas.core.tools.datetimes import (
format, ole2datetime, parse_time_string,
to_datetime, DateParseError)
Expand Down
47 changes: 1 addition & 46 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _get_freq_str(base, mult=1):
# ---------------------------------------------------------------------
# Offset names ("time rules") and related functions


from pandas._libs.tslibs.offsets import _offset_to_period_map
from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa
Minute, Hour,
Day, BDay, CDay, Week, MonthBegin,
Expand All @@ -328,51 +328,6 @@ def _get_freq_str(base, mult=1):
#: cache of previously seen offsets
_offset_map = {}

_offset_to_period_map = {
'WEEKDAY': 'D',
'EOM': 'M',
'BM': 'M',
'BQS': 'Q',
'QS': 'Q',
'BQ': 'Q',
'BA': 'A',
'AS': 'A',
'BAS': 'A',
'MS': 'M',
'D': 'D',
'C': 'C',
'B': 'B',
'T': 'T',
'S': 'S',
'L': 'L',
'U': 'U',
'N': 'N',
'H': 'H',
'Q': 'Q',
'A': 'A',
'W': 'W',
'M': 'M',
'Y': 'A',
'BY': 'A',
'YS': 'A',
'BYS': 'A',
}

need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS']
for __prefix in need_suffix:
for _m in tslib._MONTHS:
_alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
_offset_to_period_map[_alias] = _offset_to_period_map[__prefix]
for __prefix in ['A', 'Q']:
for _m in tslib._MONTHS:
_alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
_offset_to_period_map[_alias] = _alias

_days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
for _d in _days:
_alias = 'W-{day}'.format(day=_d)
_offset_to_period_map[_alias] = _alias


def get_period_alias(offset_str):
""" alias to closest period strings BQ->Q etc"""
Expand Down
Loading

0 comments on commit 58a3a90

Please sign in to comment.