Skip to content

Commit

Permalink
BUG: tz-awareness for DatetimeIndex.normalize #2338
Browse files Browse the repository at this point in the history
  • Loading branch information
changhiskhan committed Nov 24, 2012
1 parent 930c140 commit c01d1bc
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 20 deletions.
63 changes: 49 additions & 14 deletions pandas/src/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1433,31 +1433,66 @@ cdef inline int m8_weekday(int64_t val):
cdef int64_t DAY_NS = 86400000000000LL


def date_normalize(ndarray[int64_t] stamps):
def date_normalize(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
pandas_datetimestruct dts
_TSObject tso
ndarray[int64_t] result = np.empty(n, dtype=np.int64)

for i in range(n):
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
dts.hour = 0
dts.min = 0
dts.sec = 0
dts.us = 0
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
if tz is not None:
for i in range(n):
tso = convert_to_tsobject(stamps[i], tz)
dts = tso.dts
dts.hour = 0
dts.min = 0
dts.sec = 0
dts.us = 0
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
else:
for i in range(n):
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
dts.hour = 0
dts.min = 0
dts.sec = 0
dts.us = 0
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)

return result

def dates_normalized(ndarray[int64_t] stamps):

def dates_normalized(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
pandas_datetimestruct dts

for i in range(n):
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False
if tz is None or _is_utc(tz):
for i in range(n):
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False
elif _is_tzlocal(tz):
for i in range(n):
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
if (dts.min + dts.sec + dts.us) > 0:
return False
dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
dts.sec, dts.us, tz)
dt = dt + tz.utcoffset(dt)
if dt.hour > 0:
return False
else:
trans = _get_transitions(tz)
deltas = _get_deltas(tz)
for i in range(n):
# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(stamps[i]) - 1
inf = tz._transition_info[pos]

pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos],
PANDAS_FR_ns, &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False

return True

Expand Down
11 changes: 5 additions & 6 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,

def _mpl_repr(self):
# how to represent ourselves to matplotlib
return lib.ints_to_pydatetime(self.asi8)
return lib.ints_to_pydatetime(self.asi8, self.tz)

def __repr__(self):
from pandas.core.format import _format_datetime64
Expand Down Expand Up @@ -1175,8 +1175,9 @@ def normalize(self):
-------
normalized : DatetimeIndex
"""
new_values = lib.date_normalize(self.asi8)
return DatetimeIndex(new_values, freq='infer', name=self.name)
new_values = lib.date_normalize(self.asi8, self.tz)
return DatetimeIndex(new_values, freq='infer', name=self.name,
tz=self.tz)

def __iter__(self):
return iter(self._get_object_index())
Expand Down Expand Up @@ -1218,7 +1219,7 @@ def is_normalized(self):
"""
Returns True if all of the dates are at midnight ("no time")
"""
return lib.dates_normalized(self.asi8)
return lib.dates_normalized(self.asi8, self.tz)

def equals(self, other):
"""
Expand Down Expand Up @@ -1568,5 +1569,3 @@ def _in_range(start, end, rng_start, rng_end):
def _time_to_micros(time):
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
return 1000000 * seconds + time.microsecond


34 changes: 34 additions & 0 deletions pandas/tseries/tests/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,40 @@ def test_datetimeindex_tz(self):
rng2 = DatetimeIndex(data=rng, tz='US/Eastern')
self.assert_(rng.equals(rng2))

def test_normalize_tz(self):
rng = date_range('1/1/2000 9:30', periods=10, freq='D',
tz='US/Eastern')

result = rng.normalize()
expected = date_range('1/1/2000', periods=10, freq='D',
tz='US/Eastern')
self.assert_(result.equals(expected))

self.assert_(result.is_normalized)
self.assert_(not rng.is_normalized)

rng = date_range('1/1/2000 9:30', periods=10, freq='D',
tz='UTC')

result = rng.normalize()
expected = date_range('1/1/2000', periods=10, freq='D',
tz='UTC')
self.assert_(result.equals(expected))

self.assert_(result.is_normalized)
self.assert_(not rng.is_normalized)

from dateutil.tz import tzlocal
rng = date_range('1/1/2000 9:30', periods=10, freq='D',
tz=tzlocal())
result = rng.normalize()
expected = date_range('1/1/2000', periods=10, freq='D',
tz=tzlocal())
self.assert_(result.equals(expected))

self.assert_(result.is_normalized)
self.assert_(not rng.is_normalized)

if __name__ == '__main__':
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
exit=False)
8 changes: 8 additions & 0 deletions vb_suite/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,11 @@ def date_range(start=None, end=None, periods=None, freq=None):
period_setitem = \
Benchmark("df['col'] = rng", setup,
start_date=datetime(2012, 8, 1))

setup = common_setup + """
rng = date_range('1/1/2000 9:30', periods=100000, freq='S', tz='US/Eastern')
"""

datetimeindex_normalize = \
Benchmark('rng.normalize()', setup,
start_date=datetime(2012, 9, 1))

0 comments on commit c01d1bc

Please sign in to comment.