Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

ENH/API: add normalize option to DatetimeIndex, date_range, bdate_ran…

…ge. deprecate /remove _normalizeFirst option in offsets. set default for BDay to not normalize, close #1031, #506
  • Loading branch information...
commit 992b1ff5f17a0111a15963cf8387180e368a814a 1 parent d988d11
Wes McKinney wesm authored
7 RELEASE.rst
View
@@ -22,6 +22,13 @@ Where to get it
* Binary installers on PyPI: http://pypi.python.org/pypi/pandas
* Documentation: http://pandas.pydata.org
+pandas 0.8.0
+============
+
+**API Changes**
+
+ - Change BDay (business day) to not normalize dates by default
+
pandas 0.7.3
============
15 pandas/core/daterange.py
View
@@ -43,7 +43,8 @@ def __setstate__(self, aug_state):
self.tzinfo = tzinfo
Index.__setstate__(self, *index_state)
-def date_range(start=None, end=None, periods=None, freq='D', tz=None):
+def date_range(start=None, end=None, periods=None, freq='D', tz=None,
+ normalize=False):
"""
Return a fixed frequency datetime index, with day (calendar) as the default
frequency
@@ -53,16 +54,19 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None):
----------
start :
end :
+ normalize : bool, default False
+ Normalize start/end dates to midnight before generating date range
Returns
-------
"""
return DatetimeIndex(start=start, end=end, periods=periods,
- freq=freq, tz=tz)
+ freq=freq, tz=tz, normalize=normalize)
-def bdate_range(start=None, end=None, periods=None, freq='B', tz=None):
+def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
+ normalize=True):
"""
Return a fixed frequency datetime index, with business day as the default
frequency
@@ -70,6 +74,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None):
Parameters
----------
+ normalize : bool, default False
+ Normalize start/end dates to midnight before generating date
+ range. Defaults to True for legacy reasons
Returns
-------
@@ -78,7 +85,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None):
"""
return DatetimeIndex(start=start, end=end, periods=periods,
- freq=freq, tz=tz)
+ freq=freq, tz=tz, normalize=normalize)
def interval_range():
"""
124 pandas/core/datetools.py
View
@@ -891,7 +891,7 @@ def parse_time_string(arg):
def normalize_date(dt):
if isinstance(dt, np.datetime64):
dt = _dt_box(dt)
- return datetime(dt.year, dt.month, dt.day)
+ return dt.replace(hour=0, minute=0, second=0, microsecond=0)
def _get_firstbday(wkday):
"""
@@ -910,7 +910,9 @@ def _get_firstbday(wkday):
# DateOffset
class CacheableOffset(object):
- pass
+
+ _cacheable = True
+
class DateOffset(object):
"""
@@ -955,9 +957,9 @@ def __add__(date):
Since 0 is a bit weird, we suggest avoiding its use.
"""
- # For some offsets, want to drop the time information off the
- # first date
- _normalizeFirst = False
+ _cacheable = False
+ _normalize_cache = True
+
def __init__(self, n=1, **kwds):
self.n = int(n)
self.kwds = kwds
@@ -984,6 +986,9 @@ def isAnchored(self):
def copy(self):
return self.__class__(self.n, **self.kwds)
+ def _should_cache(self):
+ return self.isAnchored() and self._cacheable
+
def _params(self):
attrs = [(k, v) for k, v in vars(self).iteritems()
if k not in ['kwds', '_offset', 'name']]
@@ -1068,31 +1073,27 @@ def __neg__(self):
def rollback(self, someDate):
"""Roll provided date backward to next offset only if not on offset"""
- if self._normalizeFirst:
- someDate = normalize_date(someDate)
-
if not self.onOffset(someDate):
someDate = someDate - self.__class__(1, **self.kwds)
return someDate
- def rollforward(self, someDate):
+ def rollforward(self, dt):
"""Roll provided date forward to next offset only if not on offset"""
- if self._normalizeFirst:
- someDate = normalize_date(someDate)
+ if isinstance(dt, np.datetime64):
+ dt = _dt_box(dt)
+ if not self.onOffset(dt):
+ dt = dt + self.__class__(1, **self.kwds)
+ return dt
- if not self.onOffset(someDate):
- someDate = someDate + self.__class__(1, **self.kwds)
- return someDate
-
- def onOffset(self, someDate):
+ def onOffset(self, dt):
if type(self) == DateOffset:
return True
# Default (slow) method for determining if some date is a member of the
# date range generated by this offset. Subclasses may have this
# re-implemented in a nicer way.
- a = someDate
- b = ((someDate + self) - self)
+ a = dt
+ b = ((dt + self) - self)
return a == b
@@ -1106,13 +1107,12 @@ class BDay(DateOffset, CacheableOffset):
"""
DateOffset subclass representing possibly n business days
"""
- _normalizeFirst = True
_outputName = 'BusinessDay'
def __init__(self, n=1, **kwds):
self.n = int(n)
self.kwds = kwds
self.offset = kwds.get('offset', timedelta(0))
- self.normalize = kwds.get('normalize', True)
+ self.normalize = kwds.get('normalize', False)
def rule_code(self):
return 'B'
@@ -1171,15 +1171,15 @@ def apply(self, other):
raise Exception('Only know how to combine business day with '
'datetime or timedelta!')
@classmethod
- def onOffset(cls, someDate):
- return someDate.weekday() < 5
+ def onOffset(cls, dt):
+ if isinstance(dt, np.datetime64):
+ dt = _dt_box(dt)
+ return dt.weekday() < 5
class MonthEnd(DateOffset, CacheableOffset):
"""DateOffset of one month end"""
- _normalizeFirst = True
-
def apply(self, other):
n = self.n
_, days_in_month = lib.monthrange(other.year, other.month)
@@ -1191,10 +1191,9 @@ def apply(self, other):
return other
@classmethod
- def onOffset(cls, someDate):
- __junk, days_in_month = lib.monthrange(someDate.year,
- someDate.month)
- return someDate.day == days_in_month
+ def onOffset(cls, dt):
+ __junk, days_in_month = lib.monthrange(dt.year, dt.month)
+ return dt.day == days_in_month
def rule_code(self):
return 'M'
@@ -1202,8 +1201,6 @@ def rule_code(self):
class MonthBegin(DateOffset, CacheableOffset):
"""DateOffset of one month at beginning"""
- _normalizeFirst = True
-
def apply(self, other):
n = self.n
@@ -1214,9 +1211,9 @@ def apply(self, other):
return other
@classmethod
- def onOffset(cls, someDate):
- firstDay, _ = lib.monthrange(someDate.year, someDate.month)
- return someDate.day == (firstDay + 1)
+ def onOffset(cls, dt):
+ firstDay, _ = lib.monthrange(dt.year, dt.month)
+ return dt.day == (firstDay + 1)
def rule_code(self):
return 'MS'
@@ -1225,7 +1222,6 @@ def rule_code(self):
class BMonthEnd(DateOffset, CacheableOffset):
"""DateOffset increments between business EOM dates"""
_outputName = 'BusinessMonthEnd'
- _normalizeFirst = True
def isAnchored(self):
return (self.n == 1)
@@ -1253,8 +1249,6 @@ def rule_code(self):
class BMonthBegin(DateOffset, CacheableOffset):
"""DateOffset of one business month at beginning"""
- _normalizeFirst = True
-
def apply(self, other):
n = self.n
@@ -1284,7 +1278,6 @@ class Week(DateOffset, CacheableOffset):
weekday : int, default None
Always generate specific day of week. 0 for Monday
"""
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
self.weekday = kwds.get('weekday', None)
@@ -1321,8 +1314,8 @@ def apply(self, other):
other = other - self.inc
return other
- def onOffset(self, someDate):
- return someDate.weekday() == self.weekday
+ def onOffset(self, dt):
+ return dt.weekday() == self.weekday
def rule_code(self):
suffix = ''
@@ -1358,7 +1351,6 @@ class WeekOfMonth(DateOffset, CacheableOffset):
5: Saturdays
6: Sundays
"""
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
self.weekday = kwds['weekday']
@@ -1394,9 +1386,9 @@ def apply(self, other):
return self.getOffsetOfMonth(other + relativedelta(months=months, day=1))
- def getOffsetOfMonth(self, someDate):
+ def getOffsetOfMonth(self, dt):
w = Week(weekday=self.weekday)
- d = datetime(someDate.year, someDate.month, 1)
+ d = datetime(dt.year, dt.month, 1)
d = w.rollforward(d)
@@ -1405,8 +1397,8 @@ def getOffsetOfMonth(self, someDate):
return d
- def onOffset(self, someDate):
- return someDate == self.getOffsetOfMonth(someDate)
+ def onOffset(self, dt):
+ return dt == self.getOffsetOfMonth(dt)
def rule_code(self):
suffix = '-%d%s' % (self.week + 1, _weekday_dict.get(self.weekday, ''))
@@ -1419,7 +1411,6 @@ class BQuarterEnd(DateOffset, CacheableOffset):
startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ...
"""
_outputName = 'BusinessQuarterEnd'
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
@@ -1453,9 +1444,9 @@ def apply(self, other):
return other
- def onOffset(self, someDate):
- modMonth = (someDate.month - self.startingMonth) % 3
- return BMonthEnd().onOffset(someDate) and modMonth == 0
+ def onOffset(self, dt):
+ modMonth = (dt.month - self.startingMonth) % 3
+ return BMonthEnd().onOffset(dt) and modMonth == 0
def rule_code(self):
suffix = '-%s' % _month_dict[self.startingMonth]
@@ -1478,7 +1469,6 @@ def rule_code(self):
class BQuarterBegin(DateOffset, CacheableOffset):
_outputName = "BusinessQuarterBegin"
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
@@ -1493,9 +1483,6 @@ def isAnchored(self):
def apply(self, other):
n = self.n
- if self._normalizeFirst:
- other = normalize_date(other)
-
wkday, _ = lib.monthrange(other.year, other.month)
firstBDay = _get_firstbday(wkday)
@@ -1533,7 +1520,6 @@ class QuarterEnd(DateOffset, CacheableOffset):
startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ...
"""
_outputName = 'QuarterEnd'
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
@@ -1561,9 +1547,9 @@ def apply(self, other):
return other
- def onOffset(self, someDate):
- modMonth = (someDate.month - self.startingMonth) % 3
- return MonthEnd().onOffset(someDate) and modMonth == 0
+ def onOffset(self, dt):
+ modMonth = (dt.month - self.startingMonth) % 3
+ return MonthEnd().onOffset(dt) and modMonth == 0
def rule_code(self):
suffix = '-%s' % _month_dict[self.startingMonth]
@@ -1572,7 +1558,6 @@ def rule_code(self):
class QuarterBegin(DateOffset, CacheableOffset):
_outputName = 'QuarterBegin'
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.n = n
@@ -1613,7 +1598,6 @@ def rule_code(self):
class BYearEnd(DateOffset, CacheableOffset):
"""DateOffset increments between business EOM dates"""
_outputName = 'BusinessYearEnd'
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.month = kwds.get('month', 12)
@@ -1626,9 +1610,6 @@ def __init__(self, n=1, **kwds):
def apply(self, other):
n = self.n
- if self._normalizeFirst:
- other = normalize_date(other)
-
wkday, days_in_month = lib.monthrange(other.year, self.month)
lastBDay = (days_in_month -
max(((wkday + days_in_month - 1) % 7) - 4, 0))
@@ -1661,7 +1642,6 @@ def rule_code(self):
class BYearBegin(DateOffset, CacheableOffset):
"""DateOffset increments between business year begin dates"""
_outputName = 'BusinessYearBegin'
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.month = kwds.get('month', 1)
@@ -1674,9 +1654,6 @@ def __init__(self, n=1, **kwds):
def apply(self, other):
n = self.n
- if self._normalizeFirst:
- other = normalize_date(other)
-
wkday, days_in_month = lib.monthrange(other.year, self.month)
firstBDay = _get_firstbday(wkday)
@@ -1707,7 +1684,6 @@ def rule_code(self):
class YearEnd(DateOffset, CacheableOffset):
"""DateOffset increments between calendar year ends"""
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.month = kwds.get('month', 12)
@@ -1727,9 +1703,9 @@ def apply(self, other):
other = other + relativedelta(years=n)
return other
- def onOffset(self, someDate):
- wkday, days_in_month = lib.monthrange(someDate.year, self.month)
- return self.month == someDate.month and someDate.day == days_in_month
+ def onOffset(self, dt):
+ wkday, days_in_month = lib.monthrange(dt.year, self.month)
+ return self.month == dt.month and dt.day == days_in_month
def rule_code(self):
suffix = '-%s' % _month_dict[self.month]
@@ -1738,7 +1714,6 @@ def rule_code(self):
class YearBegin(DateOffset, CacheableOffset):
"""DateOffset increments between calendar year begin dates"""
- _normalizeFirst = True
def __init__(self, n=1, **kwds):
self.month = kwds.get('month', 12)
@@ -1758,8 +1733,8 @@ def apply(self, other):
return other
@classmethod
- def onOffset(cls, someDate):
- return someDate.month == 1 and someDate.day == 1
+ def onOffset(cls, dt):
+ return dt.month == 1 and dt.day == 1
def rule_code(self):
suffix = '-%s' % _month_dict[self.month]
@@ -1770,7 +1745,6 @@ def rule_code(self):
# Ticks
class Tick(DateOffset):
- _normalizeFirst = False
_delta = None
_inc = timedelta(microseconds=1000)
@@ -2230,8 +2204,6 @@ def _infer(a, b):
tz = _infer(end, start)
return tz
-def _will_use_cache(offset):
- return (offset.isAnchored() and isinstance(offset, CacheableOffset))
def _figure_out_timezone(start, end, tzinfo):
inferred_tz = _infer_tzinfo(start, end)
@@ -2301,8 +2273,6 @@ def generate_range(start=None, end=None, periods=None,
start = end - (periods - 1) * offset
cur = start
- if offset._normalizeFirst:
- cur = normalize_date(cur)
next_date = cur
while cur <= end:
41 pandas/core/index.py
View
@@ -1150,6 +1150,8 @@ def _maybe_box_dtindex(idx):
return Index(_dt_box_array(idx.asi8), dtype='object')
return idx
+_midnight = time(0, 0)
+
class DatetimeIndex(Int64Index):
"""
Immutable ndarray of datetime64 data, represented internally as int64, and
@@ -1204,7 +1206,7 @@ class DatetimeIndex(Int64Index):
def __new__(cls, data=None,
freq=None, start=None, end=None, periods=None,
dtype=None, copy=False, name=None, tz=None,
- verify_integrity=True, **kwds):
+ verify_integrity=True, normalize=False, **kwds):
warn = False
if 'offset' in kwds and kwds['offset']:
@@ -1232,22 +1234,37 @@ def __new__(cls, data=None,
"supplied")
if data is None:
- start = datetools.to_timestamp(start)
- end = datetools.to_timestamp(end)
+ _normalized = True
- if (start is not None and not isinstance(start, Timestamp)):
- raise ValueError('Failed to convert %s to timestamp' % start)
+ if start is not None:
+ start = datetools.to_timestamp(start)
+ if not isinstance(start, Timestamp):
+ raise ValueError('Failed to convert %s to timestamp'
+ % start)
- if (end is not None and not isinstance(end, Timestamp)):
- raise ValueError('Failed to convert %s to timestamp' % end)
+ if normalize:
+ start = datetools.normalize_date(start)
+ _normalized = True
+ else:
+ _normalized = _normalized and start.time() == _midnight
- useCache = datetools._will_use_cache(offset)
+ if end is not None:
+ end = datetools.to_timestamp(end)
+ if not isinstance(end, Timestamp):
+ raise ValueError('Failed to convert %s to timestamp'
+ % end)
- start, end, tz = datetools._figure_out_timezone(start, end, tz)
+ if normalize:
+ end = datetools.normalize_date(end)
+ _normalized = True
+ else:
+ _normalized = _normalized and end.time() == _midnight
- useCache = useCache and datetools._naive_in_cache_range(start, end)
+ start, end, tz = datetools._figure_out_timezone(start, end, tz)
- if useCache:
+ if (offset._should_cache() and
+ not (offset._normalize_cache and not _normalized) and
+ datetools._naive_in_cache_range(start, end)):
index = cls._cached_range(start, end, periods=periods,
offset=offset, name=name)
else:
@@ -1625,7 +1642,7 @@ def _fast_union(self, other):
left_start, left_end = left[0], left[-1]
right_end = right[-1]
- if not datetools._will_use_cache(self.offset):
+ if not self.offset._should_cache():
# concatenate dates
if left_end < right_end:
loc = right.searchsorted(left_end, side='right')
107 pandas/tests/test_timeseries.py
View
@@ -1,6 +1,6 @@
# pylint: disable-msg=E1101,W0612
-from datetime import datetime
+from datetime import datetime, time, timedelta
import sys
import unittest
@@ -417,7 +417,7 @@ def test_unpickle_legacy_frame(self):
self.assertEquals(len(unpickled), 10)
self.assert_((unpickled.columns == Int64Index(np.arange(5))).all())
self.assert_((unpickled.index == dtindex).all())
- self.assertEquals(unpickled.index.offset, BDay(1))
+ self.assertEquals(unpickled.index.offset, BDay(1, normalize=True))
def test_unpickle_legacy_series(self):
from pandas.core.datetools import BDay
@@ -430,7 +430,7 @@ def test_unpickle_legacy_series(self):
self.assertEquals(type(unpickled.index), DatetimeIndex)
self.assertEquals(len(unpickled), 10)
self.assert_((unpickled.index == dtindex).all())
- self.assertEquals(unpickled.index.offset, BDay(1))
+ self.assertEquals(unpickled.index.offset, BDay(1, normalize=True))
def test_arithmetic_interaction(self):
index = self.frame.index
@@ -544,6 +544,65 @@ def test_rule_aliases(self):
rule = datetools.to_offset('10us')
self.assert_(rule == datetools.Micro(10))
+ def test_slice_year(self):
+ dti = DatetimeIndex(freq='B', start=datetime(2005,1,1), periods=500)
+
+ s = Series(np.arange(len(dti)), index=dti)
+ self.assertEquals(len(s['2005']), 261)
+
+ df = DataFrame(np.random.rand(len(dti), 5), index=dti)
+ self.assertEquals(len(df.ix['2005']), 261)
+
+ def test_slice_quarter(self):
+ dti = DatetimeIndex(freq='D', start=datetime(2000,6,1), periods=500)
+
+ s = Series(np.arange(len(dti)), index=dti)
+ self.assertEquals(len(s['2001Q1']), 90)
+
+ df = DataFrame(np.random.rand(len(dti), 5), index=dti)
+ self.assertEquals(len(df.ix['1Q01']), 90)
+
+ def test_slice_month(self):
+ dti = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500)
+ s = Series(np.arange(len(dti)), index=dti)
+ self.assertEquals(len(s['2005-11']), 30)
+
+ df = DataFrame(np.random.rand(len(dti), 5), index=dti)
+ self.assertEquals(len(df.ix['2005-11']), 30)
+
+ def test_partial_slice(self):
+ rng = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500)
+ s = Series(np.arange(len(rng)), index=rng)
+
+ result = s['2005-05':'2006-02']
+ expected = s['20050501':'20060228']
+ assert_series_equal(result, expected)
+
+ result = s['2005-05':]
+ expected = s['20050501':]
+ assert_series_equal(result, expected)
+
+ result = s[:'2006-02']
+ expected = s[:'20060228']
+ assert_series_equal(result, expected)
+
+ def test_date_range_normalize(self):
+ snap = datetime.today()
+ n = 50
+
+ rng = date_range(snap, periods=n, normalize=False, freq='2D')
+
+ offset = timedelta(2)
+ values = np.array([snap + i * offset for i in range(n)],
+ dtype='M8[us]')
+
+ self.assert_(np.array_equal(rng, values))
+
+ rng = date_range('1/1/2000 08:15', periods=n, normalize=False, freq='B')
+ the_time = time(8, 15)
+ for val in rng:
+ self.assert_(val.time() == the_time)
+
class TestDateRangeCompat(unittest.TestCase):
def setUp(self):
@@ -809,48 +868,6 @@ def test_asobject_tz_box(self):
result = index.asobject
self.assert_(result[0].tz is tz)
- def test_slice_year(self):
- dti = DatetimeIndex(freq='B', start=datetime(2005,1,1), periods=500)
-
- s = Series(np.arange(len(dti)), index=dti)
- self.assertEquals(len(s['2005']), 261)
-
- df = DataFrame(np.random.rand(len(dti), 5), index=dti)
- self.assertEquals(len(df.ix['2005']), 261)
-
- def test_slice_quarter(self):
- dti = DatetimeIndex(freq='D', start=datetime(2000,6,1), periods=500)
-
- s = Series(np.arange(len(dti)), index=dti)
- self.assertEquals(len(s['2001Q1']), 90)
-
- df = DataFrame(np.random.rand(len(dti), 5), index=dti)
- self.assertEquals(len(df.ix['1Q01']), 90)
-
- def test_slice_month(self):
- dti = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500)
- s = Series(np.arange(len(dti)), index=dti)
- self.assertEquals(len(s['2005-11']), 30)
-
- df = DataFrame(np.random.rand(len(dti), 5), index=dti)
- self.assertEquals(len(df.ix['2005-11']), 30)
-
- def test_partial_slice(self):
- rng = DatetimeIndex(freq='D', start=datetime(2005,1,1), periods=500)
- s = Series(np.arange(len(rng)), index=rng)
-
- result = s['2005-05':'2006-02']
- expected = s['20050501':'20060228']
- assert_series_equal(result, expected)
-
- result = s['2005-05':]
- expected = s['20050501':]
- assert_series_equal(result, expected)
-
- result = s[:'2006-02']
- expected = s[:'20060228']
- assert_series_equal(result, expected)
-
def test_datetimeindex_constructor(self):
arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04']
self.assertRaises(Exception, DatetimeIndex, arr)
Please sign in to comment.
Something went wrong with that request. Please try again.