Permalink
Browse files

ENH: basic datetime64 integration and tests

  • Loading branch information...
1 parent 8b5351e commit 9a704bf5f9230214a90f6b0e9aaa4b8e10f3e004 @adamklein adamklein committed Jan 11, 2012
View
@@ -6,7 +6,8 @@
import pandas.core.datetools as datetools
from pandas.core.common import isnull, notnull, set_printoptions, save, load
-from pandas.core.index import Index, Int64Index, Factor, MultiIndex
+from pandas.core.index import (Index, Int64Index, Factor, MultiIndex,
+ DatetimeIndex)
from pandas.core.daterange import DateRange
from pandas.core.series import Series, TimeSeries
from pandas.core.frame import DataFrame
View
@@ -53,11 +53,15 @@ def isnull(obj):
# Working around NumPy ticket 1542
shape = obj.shape
result = np.empty(shape, dtype=bool)
- vec = lib.isnullobj(obj.ravel())
+ raveled = obj.ravel()
+ vec = lib.isnullobj(raveled)
result[:] = vec.reshape(shape)
if isinstance(obj, Series):
result = Series(result, index=obj.index, copy=False)
+ elif obj.dtype == np.datetime64:
+ # this is the NaT pattern
+ result = obj.ravel().view('i8') == 0x8000000000000000
else:
result = -np.isfinite(obj)
return result
@@ -85,6 +89,28 @@ def notnull(obj):
return not res
return -res
+_unbox_cache = dict()
+def _dt_unbox(key):
+ '''
+ Unbox datetime to datetime64
+ '''
+ try:
+ return _unbox_cache[key]
+ except KeyError:
+ _unbox_cache[key] = np.datetime64(key)
+ return _unbox_cache[key]
+
+_box_cache = dict()
+def _dt_box(key):
+ '''
+ Box datetime64 to datetime
+ '''
+ try:
+ return _box_cache[key]
+ except KeyError:
+ _box_cache[key] = key.astype('O')
+ return _box_cache[key]
+
def _pickle_array(arr):
arr = arr.view(np.ndarray)
@@ -765,7 +791,8 @@ def is_integer_dtype(arr_or_dtype):
tipo = arr_or_dtype.type
else:
tipo = arr_or_dtype.dtype.type
- return issubclass(tipo, np.integer)
+ return (issubclass(tipo, np.integer) and not
+ issubclass(tipo, np.datetime64))
def is_float_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
View
@@ -5,8 +5,9 @@
import numpy as np
-from pandas.core.index import Index
+from pandas.core.index import DatetimeIndex
import pandas.core.datetools as datetools
+from pandas.core.common import _dt_box
__all__ = ['DateRange']
@@ -15,6 +16,8 @@
def _bin_op(op):
def f(self, other):
+ if isinstance(other, datetime):
+ other = np.datetime64(other)
return op(self.view(np.ndarray), other)
return f
@@ -24,7 +27,7 @@ def f(self, other):
_daterange_cache = {}
-class DateRange(Index):
+class DateRange(DatetimeIndex):
"""
Fixed frequency date range according to input parameters.
@@ -66,10 +69,12 @@ def __new__(cls, start=None, end=None, periods=None,
start = datetools.to_datetime(start)
end = datetools.to_datetime(end)
- if start is not None and not isinstance(start, datetime):
+ if (start is not None
+ and not isinstance(start, (datetime, np.datetime64))):
raise ValueError('Failed to convert %s to datetime' % start)
- if end is not None and not isinstance(end, datetime):
+ if (end is not None
+ and not isinstance(end, (datetime, np.datetime64))):
raise ValueError('Failed to convert %s to datetime' % end)
# inside cache range. Handle UTC case
@@ -92,7 +97,7 @@ def __new__(cls, start=None, end=None, periods=None,
if tzinfo is not None:
index = [d.replace(tzinfo=tzinfo) for d in index]
- index = np.array(index, dtype=object, copy=False)
+ index = np.array(index, dtype=np.datetime64, copy=False)
index = index.view(cls)
index.name = name
index.offset = offset
@@ -101,7 +106,7 @@ def __new__(cls, start=None, end=None, periods=None,
def __reduce__(self):
"""Necessary for making this object picklable"""
- a, b, state = Index.__reduce__(self)
+ a, b, state = DatetimeIndex.__reduce__(self)
aug_state = state, self.offset, self.tzinfo
return a, b, aug_state
@@ -119,16 +124,16 @@ def __setstate__(self, aug_state):
self.offset = offset
self.tzinfo = tzinfo
- Index.__setstate__(self, *index_state)
+ DatetimeIndex.__setstate__(self, *index_state)
def equals(self, other):
if self is other:
return True
- if not isinstance(other, Index):
+ if not isinstance(other, DatetimeIndex):
return False
- return Index.equals(self.view(Index), other)
+ return DatetimeIndex.equals(self.view(DatetimeIndex), other)
@property
def is_all_dates(self):
@@ -147,7 +152,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
if offset not in _daterange_cache:
xdr = generate_range(_CACHE_START, _CACHE_END, offset=offset)
- arr = np.array(list(xdr), dtype=object, copy=False)
+ arr = np.array(list(xdr), dtype=np.datetime64, copy=False)
cachedRange = arr.view(DateRange)
cachedRange.offset = offset
@@ -208,6 +213,9 @@ def __getitem__(self, key):
"""Override numpy.ndarray's __getitem__ method to work as desired"""
result = self.view(np.ndarray)[key]
+ if isinstance(result, np.datetime64):
+ result = _dt_box(result).replace(tzinfo=self.tzinfo)
+
if isinstance(key, (int, np.integer)):
return result
elif isinstance(key, slice):
@@ -221,10 +229,7 @@ def __getitem__(self, key):
new_index.name = self.name
return new_index
else:
- if result.ndim > 1:
- return result
-
- return Index(result, name=self.name)
+ return DatetimeIndex(result, name=self.name)
def summary(self):
if len(self) > 0:
@@ -263,7 +268,7 @@ def shift(self, n, offset=None):
shifted : DateRange
"""
if offset is not None and offset != self.offset:
- return Index.shift(self, n, offset)
+ return DatetimeIndex.shift(self, n, offset)
if n == 0:
# immutable so OK
@@ -288,18 +293,18 @@ def union(self, other):
y : Index or DateRange
"""
if not isinstance(other, DateRange) or other.offset != self.offset:
- return Index.union(self.view(Index), other)
+ return DatetimeIndex.union(self.view(DatetimeIndex), other)
if self._can_fast_union(other):
return self._fast_union(other)
else:
- return Index.union(self, other)
+ return DatetimeIndex.union(self, other)
def _wrap_union_result(self, other, result):
# If we are here, _can_fast_union is false or other is not a
# DateRange, so their union has to be an Index.
name = self.name if self.name == other.name else None
- return Index(result, name=name)
+ return DatetimeIndex(result, name=name)
def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
@@ -310,7 +315,7 @@ def _wrap_joined_index(self, joined, other):
joined.name = name
return joined
else:
- return Index(joined, name=name)
+ return DatetimeIndex(joined, name=name)
def _can_fast_union(self, other):
offset = self.offset
@@ -364,7 +369,7 @@ def intersection(self, other):
y : Index or DateRange
"""
if not isinstance(other, DateRange) or other.offset != self.offset:
- return Index.intersection(self.view(Index), other)
+ return DatetimeIndex.intersection(self.view(DatetimeIndex), other)
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
@@ -376,7 +381,7 @@ def intersection(self, other):
right_start = right[0]
if left_end < right_start:
- return Index([])
+ return DatetimeIndex([])
else:
lslice = slice(*left.slice_locs(right_start, None))
left_chunk = left.values[lslice]
@@ -397,7 +402,8 @@ def tz_normalize(self, tz):
-------
normalized : DateRange
"""
- new_dates = np.array([tz.normalize(x) for x in self])
+ new_dates = np.array([tz.normalize(x.replace(tzinfo=self.tzinfo))
+ for x in self])
new_dates = new_dates.view(DateRange)
new_dates.offset = self.offset
new_dates.tzinfo = tz
@@ -412,7 +418,9 @@ def tz_localize(self, tz):
-------
localized : DateRange
"""
- new_dates = np.array([tz.localize(x) for x in self])
+ new_dates = np.array(
+ [np.datetime64(tz.localize(x.replace(tzinfo=self.tzinfo)))
+ for x in self])
new_dates = new_dates.view(DateRange)
new_dates.offset = self.offset
new_dates.tzinfo = tz
@@ -542,6 +550,15 @@ def generate_range(start=None, end=None, periods=None,
# return False
def _in_range(start, end, rng_start, rng_end):
+ if isinstance(rng_start, datetime):
+ rng_start = np.datetime64(rng_start)
+ if isinstance(rng_end, datetime):
+ rng_end = np.datetime64(rng_end)
+ if isinstance(start, datetime):
+ start = np.datetime64(start)
+ if isinstance(end, datetime):
+ end = np.datetime64(end)
+
return start > rng_start and end < rng_end
def _naive_in_cache_range(start, end):
View
@@ -2,6 +2,8 @@
from datetime import datetime, timedelta
import sys
+import numpy as np
+from pandas.core.common import _dt_box
try:
import dateutil
@@ -49,6 +51,8 @@ def to_datetime(arg):
return arg
def normalize_date(dt):
+ if isinstance(dt, np.datetime64):
+ dt = _dt_box(dt)
return datetime(dt.year, dt.month, dt.day)
#-------------------------------------------------------------------------------
View
@@ -1100,6 +1100,7 @@ def transpose(self):
"""
return self._constructor(data=self.values.T, index=self.columns,
columns=self.index, copy=False)
+
T = property(transpose)
#----------------------------------------------------------------------
@@ -1861,11 +1862,15 @@ def reset_index(self):
level_values = lev.values
if level_values.dtype == np.object_:
level_values = lib.maybe_convert_objects(level_values)
+ if level_values.dtype == np.datetime64:
+ # converts to datetime
+ # TODO: need new block type to handle datetime64
+ level_values = level_values.astype('O')
new_obj.insert(0, col_name, level_values.take(lab))
else:
name = self.index.name
- if name is None:
+ if name is None or name == 'index':
name = 'index' if 'index' not in self else 'level_0'
new_obj.insert(0, name, self.index.values)
new_obj.index = np.arange(len(new_obj))
@@ -3930,8 +3935,6 @@ def _convert_object_array(content, columns):
return sdict, columns
def _homogenize(data, index, columns, dtype=None):
- from pandas.core.series import _sanitize_array
-
homogenized = {}
if dtype is not None:
@@ -3958,13 +3961,9 @@ def _homogenize(data, index, columns, dtype=None):
# are putting it into an ndarray later
v = v.reindex(index, copy=False)
else:
- if isinstance(v, dict):
- if oindex is None:
- oindex = index.astype('O')
- v = lib.fast_multiget(v, oindex, default=np.nan)
-
- v = _sanitize_array(v, index, dtype=dtype, copy=False,
- raise_cast_failure=False)
+ v = Series(v, index=index, dtype=dtype)
+ if oindex is None:
+ oindex = index.astype('O')
homogenized[k] = v
View
@@ -310,6 +310,8 @@ def _expand_axes(self, key):
new_axes = []
for k, ax in zip(key, self.axes):
if k not in ax:
+ if type(k) != ax.dtype.type:
+ ax = ax.astype('O')
new_axes.append(np.concatenate([ax, [k]]))
else:
new_axes.append(ax)
View
@@ -432,6 +432,8 @@ def _aggregate_series_pure_python(self, obj, func, group_index, ngroups):
result = None
for label, group in self._generate_groups(obj, group_index, ngroups):
+ if group is None:
+ continue
res = func(group)
if result is None:
try:
Oops, something went wrong.

0 comments on commit 9a704bf

Please sign in to comment.