diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5de9fd9bb2196..6b7199c019c48 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -28,13 +28,14 @@ from pandas.core.dtypes.missing import isna from pandas.core import nanops -from pandas.core.algorithms import checked_add_with_arr, take, unique1d +from pandas.core.algorithms import ( + checked_add_with_arr, take, unique1d, value_counts) import pandas.core.common as com from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick -from .base import ExtensionOpsMixin +from .base import ExtensionArray, ExtensionOpsMixin def _make_comparison_op(cls, op): @@ -343,7 +344,9 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) -class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin): +class DatetimeLikeArrayMixin(ExtensionOpsMixin, + AttributesMixin, + ExtensionArray): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray @@ -701,6 +704,43 @@ def repeat(self, repeats, *args, **kwargs): values = self._data.repeat(repeats) return type(self)(values.view('i8'), dtype=self.dtype) + def value_counts(self, dropna=False): + """ + Return a Series containing counts of unique values. + + Parameters + ---------- + dropna : boolean, default True + Don't include counts of NaT values. + + Returns + ------- + Series + """ + from pandas import Series, Index + + if dropna: + values = self[~self.isna()]._data + else: + values = self._data + + cls = type(self) + + result = value_counts(values, sort=False, dropna=dropna) + index = Index(cls(result.index.view('i8'), dtype=self.dtype), + name=result.index.name) + return Series(result.values, index=index, name=result.name) + + def map(self, mapper): + # TODO(GH-23179): Add ExtensionArray.map + # Need to figure out if we want ExtensionArray.map first. + # If so, then we can refactor IndexOpsMixin._map_values to + # a standalone function and call from here.. + # Else, just rewrite _map_infer_values to do the right thing. + from pandas import Index + + return Index(self).map(mapper).array + # ------------------------------------------------------------------ # Null Handling @@ -1357,10 +1397,9 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs): if op: return op(axis=axis, skipna=skipna, **kwargs) else: - raise TypeError("cannot perform {name} with type {dtype}" - .format(name=name, dtype=self.dtype)) - # TODO: use super(DatetimeLikeArrayMixin, self)._reduce - # after we subclass ExtensionArray + return super(DatetimeLikeArrayMixin, self)._reduce( + name, skipna, **kwargs + ) def min(self, axis=None, skipna=True, *args, **kwargs): """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3cd8b483416f9..9827c111e0fd2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -22,7 +22,7 @@ from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray, datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com from pandas.core.missing import backfill_1d, pad_1d @@ -92,9 +92,7 @@ def wrapper(self, other): return compat.set_function_name(wrapper, opname, cls) -class PeriodArray(dtl.DatetimeLikeArrayMixin, - dtl.DatelikeOps, - ExtensionArray): +class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): """ Pandas ExtensionArray for storing Period data. @@ -418,21 +416,6 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def value_counts(self, dropna=False): - from pandas import Series, PeriodIndex - - if dropna: - values = self[~self.isna()]._data - else: - values = self._data - - cls = type(self) - - result = algos.value_counts(values, sort=False) - index = PeriodIndex(cls(result.index, freq=self.freq), - name=result.index.name) - return Series(result.values, index=index, name=result.name) - # -------------------------------------------------------------------- def _time_shift(self, n, freq=None): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3611e3696e390..719a79cf300a0 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -16,7 +16,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + is_int64_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -244,6 +244,16 @@ def _maybe_clear_freq(self): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods + def __array__(self, dtype=None): + # TODO(https://github.com/pandas-dev/pandas/pull/23593) + # Maybe push to parent once datetimetz __array__ is figured out. + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + elif is_int64_dtype(dtype): + return self.asi8 + + return self._data + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) def _validate_fill_value(self, fill_value): if isna(fill_value): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 9bc1e6d7b46fa..9f0954d328f89 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -123,6 +123,21 @@ def test_repeat_preserves_tz(self): expected = DatetimeArray(arr.asi8, freq=None, tz=arr.tz) tm.assert_equal(repeated, expected) + def test_value_counts_preserves_tz(self): + dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central') + arr = DatetimeArray(dti).repeat([4, 3]) + + result = arr.value_counts() + + # Note: not tm.assert_index_equal, since `freq`s do not match + assert result.index.equals(dti) + + arr[-2] = pd.NaT + result = arr.value_counts() + expected = pd.Series([1, 4, 2], + index=[pd.NaT, dti[0], dti[1]]) + tm.assert_series_equal(result, expected) + class TestSequenceToDT64NS(object):