Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement reductions from #24024 #24484

Merged
merged 6 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from pandas.core.algorithms import checked_add_with_arr, take, unique1d
import pandas.core.common as com
from pandas.core import nanops

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick
Expand Down Expand Up @@ -1381,6 +1382,71 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
)
return arg

# --------------------------------------------------------------
# Reductions

def _reduce(self, name, axis=0, skipna=True, **kwargs):
op = getattr(self, name, None)
if op:
return op(axis=axis, skipna=skipna, **kwargs)
else:
raise TypeError("cannot perform {name} with type {dtype}"
.format(name=name, dtype=self.dtype))
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
# after we subclass ExtensionArray

def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the minimum value of the Array or minimum along
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these meant in inherit docstrings? these should have Parameters if not

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are copied from the DatetimeIndexOps versions, with "Index" changes to "Array" and Index.min/Index.max added to the See Also sections. These will be templated/shared before long hopefully

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok can u make a follow up issue for this

an axis.

See Also
--------
numpy.ndarray.min
Index.min : Return the minimum value in an Index.
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)

result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
if isna(result):
# Period._from_ordinal does not handle np.nan gracefully
return NaT
return self._box_func(result)

def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Return the maximum value of the Array or maximum along
an axis.

See Also
--------
numpy.ndarray.max
Index.max : Return the maximum value in an Index.
Series.max : Return the maximum value in a Series.
"""
# TODO: skipna is broken with max.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was just fixed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought so too, but no. I expect it won't be too tough to fix nanops to apply the DTI fix to DTA, will do so in an upcoming pass after the 24024-specific parts are de-duplicated

# See https://github.com/pandas-dev/pandas/issues/24265
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

mask = self.isna()
if skipna:
values = self[~mask].asi8
elif mask.any():
return NaT
else:
values = self.asi8

if not len(values):
# short-circut for empty max / min
return NaT

result = nanops.nanmax(values, skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)


DatetimeLikeArrayMixin._add_comparison_ops()

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ def test_scalar_from_string(self):
result = arr._scalar_from_string(str(arr[0]))
assert result == arr[0]

def test_reduce_invalid(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

with pytest.raises(TypeError, match='cannot perform'):
arr._reduce("not a method")

def test_searchsorted(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,41 @@ def test_setitem_clears_freq(self):
tz='US/Central'))
a[0] = pd.Timestamp("2000", tz="US/Central")
assert a.freq is None


class TestReductions(object):

@pytest.mark.parametrize("tz", [None, "US/Central"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in test_reductions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before long, yah. For now i want to make rebasing 24024 easy on tom

def test_min_max(self, tz):
arr = DatetimeArray._from_sequence([
'2000-01-03',
'2000-01-03',
'NaT',
'2000-01-02',
'2000-01-05',
'2000-01-04',
], tz=tz)

result = arr.min()
expected = pd.Timestamp('2000-01-02', tz=tz)
assert result == expected

result = arr.max()
expected = pd.Timestamp('2000-01-05', tz=tz)
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize("tz", [None, "US/Central"])
@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna, tz):
arr = DatetimeArray._from_sequence([], tz=tz)
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT
39 changes: 39 additions & 0 deletions pandas/tests/arrays/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,42 @@ def test_repr_large():
"Length: 1000, dtype: period[D]"
)
assert result == expected


# ----------------------------------------------------------------------------
# Reductions

class TestReductions(object):

def test_min_max(self):
arr = period_array([
'2000-01-03',
'2000-01-03',
'NaT',
'2000-01-02',
'2000-01-05',
'2000-01-04',
], freq='D')

result = arr.min()
expected = pd.Period('2000-01-02', freq='D')
assert result == expected

result = arr.max()
expected = pd.Period('2000-01-05', freq='D')
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna):
arr = period_array([], freq='D')
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT
31 changes: 31 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,34 @@ def test_setitem_clears_freq(self):
a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H'))
a[0] = pd.Timedelta("1H")
assert a.freq is None


class TestReductions(object):

def test_min_max(self):
arr = TimedeltaArray._from_sequence([
'3H', '3H', 'NaT', '2H', '5H', '4H',
])

result = arr.min()
expected = pd.Timedelta('2H')
assert result == expected

result = arr.max()
expected = pd.Timedelta('5H')
assert result == expected

result = arr.min(skipna=False)
assert result is pd.NaT

result = arr.max(skipna=False)
assert result is pd.NaT

@pytest.mark.parametrize('skipna', [True, False])
def test_min_max_empty(self, skipna):
arr = TimedeltaArray._from_sequence([])
result = arr.min(skipna=skipna)
assert result is pd.NaT

result = arr.max(skipna=skipna)
assert result is pd.NaT