Skip to content

Commit

Permalink
ENH: implement DatetimeLikeArray (#19902)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Jul 2, 2018
1 parent 8b2070a commit 7cd2679
Show file tree
Hide file tree
Showing 9 changed files with 347 additions and 229 deletions.
3 changes: 3 additions & 0 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .base import (ExtensionArray, # noqa
ExtensionScalarOpsMixin)
from .categorical import Categorical # noqa
from .datetimes import DatetimeArrayMixin # noqa
from .period import PeriodArrayMixin # noqa
from .timedelta import TimedeltaArrayMixin # noqa
169 changes: 169 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# -*- coding: utf-8 -*-

import numpy as np

from pandas._libs import iNaT
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds

from pandas.tseries import frequencies

import pandas.core.common as com
from pandas.core.algorithms import checked_add_with_arr


class DatetimeLikeArrayMixin(object):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
Assumes that __new__/__init__ defines:
_data
_freq
and that the inheriting class has methods:
_validate_frequency
"""

@property
def _box_func(self):
"""
box function to get object from internal representation
"""
raise com.AbstractMethodError(self)

def __iter__(self):
return (self._box_func(v) for v in self.asi8)

@property
def values(self):
""" return the underlying data as an ndarray """
return self._data.view(np.ndarray)

@property
def asi8(self):
# do not cache or you'll create a memory leak
return self.values.view('i8')

# ------------------------------------------------------------------
# Null Handling

@property # NB: override with cache_readonly in immutable subclasses
def _isnan(self):
""" return if each value is nan"""
return (self.asi8 == iNaT)

@property # NB: override with cache_readonly in immutable subclasses
def hasnans(self):
""" return if I have any nans; enables various perf speedups """
return self._isnan.any()

def _maybe_mask_results(self, result, fill_value=None, convert=None):
"""
Parameters
----------
result : a ndarray
convert : string/dtype or None
Returns
-------
result : ndarray with values replace by the fill_value
mask the result if needed, convert to the provided dtype if its not
None
This is an internal routine
"""

if self.hasnans:
if convert:
result = result.astype(convert)
if fill_value is None:
fill_value = np.nan
result[self._isnan] = fill_value
return result

# ------------------------------------------------------------------
# Frequency Properties/Methods

@property
def freq(self):
"""Return the frequency object if it is set, otherwise None"""
return self._freq

@freq.setter
def freq(self, value):
if value is not None:
value = frequencies.to_offset(value)
self._validate_frequency(self, value)

self._freq = value

@property
def freqstr(self):
"""
Return the frequency object as a string if its set, otherwise None
"""
if self.freq is None:
return None
return self.freq.freqstr

@property # NB: override with cache_readonly in immutable subclasses
def inferred_freq(self):
"""
Tryies to return a string representing a frequency guess,
generated by infer_freq. Returns None if it can't autodetect the
frequency.
"""
try:
return frequencies.infer_freq(self)
except ValueError:
return None

# ------------------------------------------------------------------
# Arithmetic Methods

def _add_datelike(self, other):
raise TypeError("cannot add {cls} and {typ}"
.format(cls=type(self).__name__,
typ=type(other).__name__))

def _sub_datelike(self, other):
raise com.AbstractMethodError(self)

def _sub_period(self, other):
return NotImplemented

def _add_offset(self, offset):
raise com.AbstractMethodError(self)

def _add_delta(self, other):
return NotImplemented

def _add_delta_td(self, other):
"""
Add a delta of a timedeltalike
return the i8 result view
"""
inc = delta_to_nanoseconds(other)
new_values = checked_add_with_arr(self.asi8, inc,
arr_mask=self._isnan).view('i8')
if self.hasnans:
new_values[self._isnan] = iNaT
return new_values.view('i8')

def _add_delta_tdi(self, other):
"""
Add a delta of a TimedeltaIndex
return the i8 result view
"""
if not len(self) == len(other):
raise ValueError("cannot add indices of unequal length")

self_i8 = self.asi8
other_i8 = other.asi8
new_values = checked_add_with_arr(self_i8, other_i8,
arr_mask=self._isnan,
b_mask=other._isnan)
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('i8')
110 changes: 110 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
import warnings

import numpy as np

from pandas._libs.tslib import Timestamp, NaT, iNaT
from pandas._libs.tslibs import timezones

from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64tz_dtype
from pandas.core.dtypes.dtypes import DatetimeTZDtype

from .datetimelike import DatetimeLikeArrayMixin


class DatetimeArrayMixin(DatetimeLikeArrayMixin):
"""
Assumes that subclass __new__/__init__ defines:
tz
_freq
_data
"""

# -----------------------------------------------------------------
# Descriptive Properties

@property
def _box_func(self):
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)

@cache_readonly
def dtype(self):
if self.tz is None:
return _NS_DTYPE
return DatetimeTZDtype('ns', self.tz)

@property
def tzinfo(self):
"""
Alias for tz attribute
"""
return self.tz

@property # NB: override with cache_readonly in immutable subclasses
def _timezone(self):
""" Comparable timezone both for pytz / dateutil"""
return timezones.get_timezone(self.tzinfo)

@property
def offset(self):
"""get/set the frequency of the instance"""
msg = ('DatetimeIndex.offset has been deprecated and will be removed '
'in a future version; use DatetimeIndex.freq instead.')
warnings.warn(msg, FutureWarning, stacklevel=2)
return self.freq

@offset.setter
def offset(self, value):
"""get/set the frequency of the instance"""
msg = ('DatetimeIndex.offset has been deprecated and will be removed '
'in a future version; use DatetimeIndex.freq instead.')
warnings.warn(msg, FutureWarning, stacklevel=2)
self.freq = value

# -----------------------------------------------------------------
# Comparison Methods

def _has_same_tz(self, other):
zzone = self._timezone

# vzone sholdn't be None if value is non-datetime like
if isinstance(other, np.datetime64):
# convert to Timestamp as np.datetime64 doesn't have tz attr
other = Timestamp(other)
vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__'))
return zzone == vzone

def _assert_tzawareness_compat(self, other):
# adapted from _Timestamp._assert_tzawareness_compat
other_tz = getattr(other, 'tzinfo', None)
if is_datetime64tz_dtype(other):
# Get tzinfo from Series dtype
other_tz = other.dtype.tz
if other is NaT:
# pd.NaT quacks both aware and naive
pass
elif self.tz is None:
if other_tz is not None:
raise TypeError('Cannot compare tz-naive and tz-aware '
'datetime-like objects.')
elif other_tz is None:
raise TypeError('Cannot compare tz-naive and tz-aware '
'datetime-like objects')

# -----------------------------------------------------------------
# Arithmetic Methods

def _sub_datelike_dti(self, other):
"""subtraction of two DatetimeIndexes"""
if not len(self) == len(other):
raise ValueError("cannot add indices of unequal length")

self_i8 = self.asi8
other_i8 = other.asi8
new_values = self_i8 - other_i8
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('timedelta64[ns]')
28 changes: 28 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-

from pandas._libs.tslibs.period import Period

from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.dtypes import PeriodDtype

from .datetimelike import DatetimeLikeArrayMixin


class PeriodArrayMixin(DatetimeLikeArrayMixin):
@property
def _box_func(self):
return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)

@cache_readonly
def dtype(self):
return PeriodDtype.construct_from_string(self.freq)

@property
def _ndarray_values(self):
# Ordinals
return self._data

@property
def asi8(self):
return self._ndarray_values.view('i8')
17 changes: 17 additions & 0 deletions pandas/core/arrays/timedelta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-

from pandas._libs.tslib import Timedelta

from pandas.core.dtypes.common import _TD_DTYPE

from .datetimelike import DatetimeLikeArrayMixin


class TimedeltaArrayMixin(DatetimeLikeArrayMixin):
@property
def _box_func(self):
return lambda x: Timedelta(x, unit='ns')

@property
def dtype(self):
return _TD_DTYPE
Loading

0 comments on commit 7cd2679

Please sign in to comment.