Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datetimelike add/sub catch cases more explicitly, tests #19912

Merged
merged 8 commits into from
Mar 6, 2018
34 changes: 28 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
is_period_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
ABCIndex, ABCSeries, ABCDataFrame, ABCPeriodIndex, ABCIndexClass)
from pandas.core.dtypes.missing import isna
from pandas.core import common as com, algorithms, ops
from pandas.core.algorithms import checked_add_with_arr
Expand All @@ -48,6 +48,7 @@
from pandas.util._decorators import Appender, cache_readonly
import pandas.core.dtypes.concat as _concat
import pandas.tseries.frequencies as frequencies
from pandas.tseries.offsets import Tick, DateOffset

import pandas.core.indexes.base as ibase
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
Expand Down Expand Up @@ -666,6 +667,9 @@ def _sub_nat(self):
def _sub_period(self, other):
return NotImplemented

def _add_offset(self, offset):
raise com.AbstractMethodError(self)

def _addsub_offset_array(self, other, op):
"""
Add or subtract array-like of DateOffset objects
Expand Down Expand Up @@ -705,14 +709,17 @@ def __add__(self, other):
from pandas import DateOffset

other = lib.item_from_zerodim(other)
if isinstance(other, ABCSeries):
if isinstance(other, (ABCSeries, ABCDataFrame)):
return NotImplemented

# scalar others
elif other is NaT:
result = self._add_nat()
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_delta(other)
elif isinstance(other, DateOffset):
# specifically _not_ a Tick
result = self._add_offset(other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._add_datelike(other)
elif is_integer(other):
Expand All @@ -733,6 +740,12 @@ def __add__(self, other):
elif is_integer_dtype(other) and self.freq is None:
# GH#19123
raise NullFrequencyError("Cannot shift with no freq")
elif is_float_dtype(other):
# Explicitly catch invalid dtypes
raise TypeError("cannot add {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))

else: # pragma: no cover
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what hits this then? You want to make this NOT propagate on the reversed ops yes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many cases still hit this (objectdtype, categoricaldtype, miscellaneous scalars...) catching floatdype is the main obvious one. Handling object dtype correctly is the next PR.

return NotImplemented

Expand All @@ -753,17 +766,20 @@ def __radd__(self, other):
cls.__radd__ = __radd__

def __sub__(self, other):
from pandas import Index, DateOffset
from pandas import Index

other = lib.item_from_zerodim(other)
if isinstance(other, ABCSeries):
if isinstance(other, (ABCSeries, ABCDataFrame)):
return NotImplemented

# scalar others
elif other is NaT:
result = self._sub_nat()
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_delta(-other)
elif isinstance(other, DateOffset):
# specifically _not_ a Tick
result = self._add_offset(-other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._sub_datelike(other)
elif is_integer(other):
Expand All @@ -790,6 +806,12 @@ def __sub__(self, other):
elif is_integer_dtype(other) and self.freq is None:
# GH#19123
raise NullFrequencyError("Cannot shift with no freq")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in next pass remove the extra line, want to be consistent on formatting

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

elif is_float_dtype(other):
# Explicitly catch invalid dtypes
raise TypeError("cannot subtract {dtype}-dtype from {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))
else: # pragma: no cover
return NotImplemented

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,8 +933,6 @@ def _add_delta(self, delta):
if not isinstance(delta, TimedeltaIndex):
delta = TimedeltaIndex(delta)
new_values = self._add_delta_tdi(delta)
elif isinstance(delta, DateOffset):
new_values = self._add_offset(delta).asi8
else:
new_values = self.astype('O') + delta

Expand All @@ -945,6 +943,7 @@ def _add_delta(self, delta):
return result

def _add_offset(self, offset):
assert not isinstance(offset, Tick)
try:
if self.tz is not None:
values = self.tz_localize(None)
Expand All @@ -953,12 +952,13 @@ def _add_offset(self, offset):
result = offset.apply_index(values)
if self.tz is not None:
result = result.tz_localize(self.tz)
return result

except NotImplementedError:
warnings.warn("Non-vectorized DateOffset being applied to Series "
"or DatetimeIndex", PerformanceWarning)
return self.astype('O') + offset
result = self.astype('O') + offset

return DatetimeIndex(result, freq='infer')

def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.io.formats.format import _get_format_datetime64_from_values
Expand Down
33 changes: 29 additions & 4 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@

import pandas.tseries.frequencies as frequencies
from pandas.tseries.frequencies import get_freq_code as _gfc
from pandas.tseries.offsets import Tick, DateOffset

from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index
from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin
from pandas.core.tools.datetimes import parse_time_string
import pandas.tseries.offsets as offsets

from pandas._libs.lib import infer_dtype
from pandas._libs import tslib, index as libindex
Expand Down Expand Up @@ -680,9 +681,9 @@ def to_timestamp(self, freq=None, how='start'):

def _maybe_convert_timedelta(self, other):
if isinstance(
other, (timedelta, np.timedelta64, offsets.Tick, np.ndarray)):
other, (timedelta, np.timedelta64, Tick, np.ndarray)):
offset = frequencies.to_offset(self.freq.rule_code)
if isinstance(offset, offsets.Tick):
if isinstance(offset, Tick):
if isinstance(other, np.ndarray):
nanos = np.vectorize(delta_to_nanoseconds)(other)
else:
Expand All @@ -691,7 +692,7 @@ def _maybe_convert_timedelta(self, other):
check = np.all(nanos % offset_nanos == 0)
if check:
return nanos // offset_nanos
elif isinstance(other, offsets.DateOffset):
elif isinstance(other, DateOffset):
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
if base == self.freq.rule_code:
Expand All @@ -707,6 +708,30 @@ def _maybe_convert_timedelta(self, other):
msg = "Input has different freq from PeriodIndex(freq={0})"
raise IncompatibleFrequency(msg.format(self.freqstr))

def _add_offset(self, other):
assert not isinstance(other, Tick)
base = frequencies.get_base_alias(other.rule_code)
if base != self.freq.rule_code:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you added lots of things here, but don't appear to be tested

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are not new cases; they are currently handled in a jumble in maybe_convert_timedeltalike. This method already exists for DatetimeIndex.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be easier to push through if _add_offset were separated from the rest of the PR? This doesn't change any behavior, but does let us remove a particularly inelegant piece of code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, you are adding code here, not removing code, so not sure what is changing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a deal-breaker I can change this to just alias _add_offset = _add_delta (which is the method currently called when a DateOffset is added to a PeriodIndex). The point of this part of the PR is a) by separating the methods for Tick vs non-Tick offsets, we get to simplify elsewhere, and b) _add_delta is a thin wrapper around _maybe_convert_timedelta, which is on the messy side.

msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
raise IncompatibleFrequency(msg)
return self.shift(other.n)

def _add_delta_td(self, other):
assert isinstance(other, (timedelta, np.timedelta64, Tick))
nanos = delta_to_nanoseconds(other)
own_offset = frequencies.to_offset(self.freq.rule_code)

if isinstance(own_offset, Tick):
offset_nanos = delta_to_nanoseconds(own_offset)
if np.all(nanos % offset_nanos == 0):
return self.shift(nanos // offset_nanos)

# raise when input doesn't have freq
raise IncompatibleFrequency("Input has different freq from "
"{cls}(freq={freqstr})"
.format(cls=type(self).__name__,
freqstr=self.freqstr))

def _add_delta(self, other):
ordinal_delta = self._maybe_convert_timedelta(other)
return self.shift(ordinal_delta)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ def _maybe_update_attributes(self, attrs):
attrs['freq'] = 'infer'
return attrs

def _add_offset(self, other):
assert not isinstance(other, Tick)
raise TypeError("cannot add the type {typ} to a {cls}"
.format(typ=type(other).__name__,
cls=type(self).__name__))

def _add_delta(self, delta):
"""
Add a timedelta-like, Tick, or TimedeltaIndex-like object
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pandas import (Timestamp, Timedelta, Series,
DatetimeIndex, TimedeltaIndex,
date_range)
from pandas.core import ops
from pandas._libs import tslib
from pandas._libs.tslibs.offsets import shift_months

Expand Down Expand Up @@ -307,6 +308,17 @@ def test_dti_cmp_list(self):

class TestDatetimeIndexArithmetic(object):

# -------------------------------------------------------------
# Invalid Operations

@pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason to add these to each rather than using a common test?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do slightly prefer the readability when they are in the sub-classes, but at the same time it doesn't fully guarantee that the tests are actually testing all subclasses, so we have a tradeoff here. ideally would systematically test common behaviors in test_datetimelike

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about this, put them here because ATM this is these are well-established as the correct place for arithmetic tests. But I agree that there are a bunch of arithmetic tests that could be shared between DTI/TDI/PI. Maybe tests.indexes.test_datetimelike_arithmetic? A bit verbose...

BTW next time I complain about parametrization/fixtures, point out statsmodels' tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, let's think about this. maybe open an issue about it.

@pytest.mark.parametrize('op', [operator.add, ops.radd,
operator.sub, ops.rsub])
def test_dti_add_sub_float(self, op, other):
dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
with pytest.raises(TypeError):
op(dti, other)

def test_dti_add_timestamp_raises(self):
idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
msg = "cannot add DatetimeIndex and Timestamp"
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/period/test_arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import operator

import pytest
import numpy as np

Expand All @@ -9,6 +11,7 @@
period_range, Period, PeriodIndex,
_np_version_under1p10)
import pandas.core.indexes.period as period
from pandas.core import ops
from pandas.errors import PerformanceWarning


Expand Down Expand Up @@ -256,6 +259,18 @@ def test_comp_nat(self, dtype):

class TestPeriodIndexArithmetic(object):

# -------------------------------------------------------------
# Invalid Operations

@pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
@pytest.mark.parametrize('op', [operator.add, ops.radd,
operator.sub, ops.rsub])
def test_pi_add_sub_float(self, op, other):
dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
pi = dti.to_period('D')
with pytest.raises(TypeError):
op(pi, other)

# -----------------------------------------------------------------
# __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64]

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
import operator

import pytest
import numpy as np
from datetime import timedelta
Expand All @@ -11,6 +13,7 @@
Series,
Timestamp, Timedelta)
from pandas.errors import PerformanceWarning, NullFrequencyError
from pandas.core import ops


@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2),
Expand Down Expand Up @@ -270,6 +273,15 @@ class TestTimedeltaIndexArithmetic(object):
# -------------------------------------------------------------
# Invalid Operations

@pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
@pytest.mark.parametrize('op', [operator.add, ops.radd,
operator.sub, ops.rsub])
def test_tdi_add_sub_float(self, op, other):
dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
tdi = dti - dti.shift(1)
with pytest.raises(TypeError):
op(tdi, other)

def test_tdi_add_str_invalid(self):
# GH 13624
tdi = TimedeltaIndex(['1 day', '2 days'])
Expand Down