Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mix EA into DTA/TDA; part of 24024 #24502

Merged
merged 4 commits into from
Dec 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
from pandas.core.dtypes.missing import isna

from pandas.core import nanops
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
from pandas.core.algorithms import (
checked_add_with_arr, take, unique1d, value_counts)
import pandas.core.common as com

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick

from .base import ExtensionOpsMixin
from .base import ExtensionArray, ExtensionOpsMixin


def _make_comparison_op(cls, op):
Expand Down Expand Up @@ -343,7 +344,9 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)


class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
class DatetimeLikeArrayMixin(ExtensionOpsMixin,
AttributesMixin,
ExtensionArray):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray

Expand Down Expand Up @@ -701,6 +704,43 @@ def repeat(self, repeats, *args, **kwargs):
values = self._data.repeat(repeats)
return type(self)(values.view('i8'), dtype=self.dtype)

def value_counts(self, dropna=False):
"""
Return a Series containing counts of unique values.

Parameters
----------
dropna : boolean, default True
Don't include counts of NaT values.

Returns
-------
Series
"""
from pandas import Series, Index

if dropna:
values = self[~self.isna()]._data
else:
values = self._data

cls = type(self)

result = value_counts(values, sort=False, dropna=dropna)
index = Index(cls(result.index.view('i8'), dtype=self.dtype),
name=result.index.name)
return Series(result.values, index=index, name=result.name)

def map(self, mapper):
# TODO(GH-23179): Add ExtensionArray.map
# Need to figure out if we want ExtensionArray.map first.
# If so, then we can refactor IndexOpsMixin._map_values to
# a standalone function and call from here..
# Else, just rewrite _map_infer_values to do the right thing.
from pandas import Index

return Index(self).map(mapper).array

# ------------------------------------------------------------------
# Null Handling

Expand Down Expand Up @@ -1357,10 +1397,9 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
if op:
return op(axis=axis, skipna=skipna, **kwargs)
else:
raise TypeError("cannot perform {name} with type {dtype}"
.format(name=name, dtype=self.dtype))
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
# after we subclass ExtensionArray
return super(DatetimeLikeArrayMixin, self)._reduce(
name, skipna, **kwargs
)

def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Expand Down
21 changes: 2 additions & 19 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pandas.core.dtypes.missing import isna, notna

import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray, datetimelike as dtl
from pandas.core.arrays import datetimelike as dtl
import pandas.core.common as com
from pandas.core.missing import backfill_1d, pad_1d

Expand Down Expand Up @@ -92,9 +92,7 @@ def wrapper(self, other):
return compat.set_function_name(wrapper, opname, cls)


class PeriodArray(dtl.DatetimeLikeArrayMixin,
dtl.DatelikeOps,
ExtensionArray):
class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
"""
Pandas ExtensionArray for storing Period data.

Expand Down Expand Up @@ -418,21 +416,6 @@ def fillna(self, value=None, method=None, limit=None):
new_values = self.copy()
return new_values

def value_counts(self, dropna=False):
from pandas import Series, PeriodIndex

if dropna:
values = self[~self.isna()]._data
else:
values = self._data

cls = type(self)

result = algos.value_counts(values, sort=False)
index = PeriodIndex(cls(result.index, freq=self.freq),
name=result.index.name)
return Series(result.values, index=index, name=result.name)

# --------------------------------------------------------------------

def _time_shift(self, n, freq=None):
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_int64_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
Expand Down Expand Up @@ -244,6 +244,16 @@ def _maybe_clear_freq(self):
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype=None):
# TODO(https://github.com/pandas-dev/pandas/pull/23593)
# Maybe push to parent once datetimetz __array__ is figured out.
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
elif is_int64_dtype(dtype):
return self.asi8

return self._data

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ def test_repeat_preserves_tz(self):
expected = DatetimeArray(arr.asi8, freq=None, tz=arr.tz)
tm.assert_equal(repeated, expected)

def test_value_counts_preserves_tz(self):
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
arr = DatetimeArray(dti).repeat([4, 3])

result = arr.value_counts()

# Note: not tm.assert_index_equal, since `freq`s do not match
assert result.index.equals(dti)

arr[-2] = pd.NaT
result = arr.value_counts()
expected = pd.Series([1, 4, 2],
index=[pd.NaT, dti[0], dti[1]])
tm.assert_series_equal(result, expected)


class TestSequenceToDT64NS(object):

Expand Down