From 96abfe105dadaeb0fb38f27a694bc506d24bc120 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Aug 2019 18:01:20 -0700 Subject: [PATCH 01/10] REF: make dispatch_to_extension_op signature match dispatch_to_index_op --- pandas/core/ops/__init__.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index a9d18c194889c..76cca76f32a0b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -37,12 +37,14 @@ from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeArray, + ABCDatetimeIndex, ABCIndex, ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCTimedeltaArray, + ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import isna, notna @@ -91,7 +93,7 @@ def get_op_result_name(left, right): name : object Usually a string """ - # `left` is always a pd.Series when called from within ops + # `left` is always a Series when called from within ops if isinstance(right, (ABCSeries, ABCIndexClass)): name = _maybe_match_name(left, right) else: @@ -656,13 +658,16 @@ def dispatch_to_extension_op(op, left, right): else: new_right = right - res_values = op(new_left, new_right) - res_name = get_op_result_name(left, right) - - if op.__name__ in ["divmod", "rdivmod"]: - return _construct_divmod_result(left, res_values, left.index, res_name) - - return _construct_result(left, res_values, left.index, res_name) + try: + res_values = op(new_left, new_right) + except NullFrequencyError: + # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError + # on add/sub of integers (or int-like). We re-raise as a TypeError. + raise TypeError( + "incompatible type for a datetime/timedelta " + "operation [{name}]".format(name=op.__name__) + ) + return res_values # ----------------------------------------------------------------------------- @@ -996,7 +1001,8 @@ def wrapper(left, right): is_extension_array_dtype(right) and not is_scalar(right) ): # GH#22378 disallow scalar to exclude e.g. "category", "Int64" - return dispatch_to_extension_op(op, left, right) + result = dispatch_to_extension_op(op, left, right) + return construct_result(left, result, index=left.index, name=res_name) elif is_timedelta64_dtype(left): result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex) @@ -1015,7 +1021,7 @@ def wrapper(left, right): right = np.broadcast_to(right, left.shape) right = pd.TimedeltaIndex(right) - assert isinstance(right, (pd.TimedeltaIndex, ABCTimedeltaArray, ABCSeries)) + assert isinstance(right, (ABCTimedeltaIndex, ABCTimedeltaArray, ABCSeries)) try: result = op(left._values, right) except NullFrequencyError: @@ -1033,7 +1039,7 @@ def wrapper(left, right): # does inference in the case where `result` has object-dtype. return construct_result(left, result, index=left.index, name=res_name) - elif isinstance(right, (ABCDatetimeArray, pd.DatetimeIndex)): + elif isinstance(right, (ABCDatetimeArray, ABCDatetimeIndex)): result = op(left._values, right) return construct_result(left, result, index=left.index, name=res_name) @@ -1140,7 +1146,7 @@ def wrapper(self, other, axis=None): raise ValueError("Can only compare identically-labeled Series objects") elif is_categorical_dtype(self): - # Dispatch to Categorical implementation; pd.CategoricalIndex + # Dispatch to Categorical implementation; CategoricalIndex # behavior is non-canonical GH#19513 res_values = dispatch_to_index_op(op, self, other, pd.Categorical) return self._constructor(res_values, index=self.index, name=res_name) @@ -1161,7 +1167,10 @@ def wrapper(self, other, axis=None): ): # Note: the `not is_scalar(other)` condition rules out # e.g. other == "category" - return dispatch_to_extension_op(op, self, other) + res_values = dispatch_to_extension_op(op, self, other) + return self._constructor( + res_values, index=self.index, name=res_name + ).rename(res_name) elif isinstance(other, ABCSeries): # By this point we have checked that self._indexed_same(other) From d542fa5b585fe81f7de4f563ff0e309cd3690f70 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Aug 2019 18:36:50 -0700 Subject: [PATCH 02/10] replace usage of dispatch_to_index_op --- pandas/core/ops/__init__.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 76cca76f32a0b..288fa7ef8870b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -989,13 +989,9 @@ def wrapper(left, right): ) elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): - # Give dispatch_to_index_op a chance for tests like - # test_dt64_series_add_intlike, which the index dispatching handles - # specifically. - result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) - return construct_result( - left, result, index=left.index, name=res_name, dtype=result.dtype - ) + from pandas.core.arrays import DatetimeArray + result = dispatch_to_extension_op(op, DatetimeArray(left), right) + return construct_result(left, result, index=left.index, name=res_name) elif is_extension_array_dtype(left) or ( is_extension_array_dtype(right) and not is_scalar(right) @@ -1148,7 +1144,7 @@ def wrapper(self, other, axis=None): elif is_categorical_dtype(self): # Dispatch to Categorical implementation; CategoricalIndex # behavior is non-canonical GH#19513 - res_values = dispatch_to_index_op(op, self, other, pd.Categorical) + res_values = dispatch_to_extension_op(op, self, other) return self._constructor(res_values, index=self.index, name=res_name) elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): From 1f34cad6705c8d2f5ada7e09550c7a8ac6282278 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Aug 2019 18:53:32 -0700 Subject: [PATCH 03/10] Avoid dispatch_to_index_op in a couple places --- pandas/core/ops/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 288fa7ef8870b..a9001790728ac 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1150,12 +1150,13 @@ def wrapper(self, other, axis=None): elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): # Dispatch to DatetimeIndex to ensure identical # Series/Index behavior - - res_values = dispatch_to_index_op(op, self, other, pd.DatetimeIndex) + from pandas.core.arrays import DatetimeArray + res_values = dispatch_to_extension_op(op, DatetimeArray(self), other) return self._constructor(res_values, index=self.index, name=res_name) elif is_timedelta64_dtype(self): - res_values = dispatch_to_index_op(op, self, other, pd.TimedeltaIndex) + from pandas.core.arrays import TimedeltaArray + res_values = dispatch_to_extension_op(op, TimedeltaArray(self), other) return self._constructor(res_values, index=self.index, name=res_name) elif is_extension_array_dtype(self) or ( From 00836115a37a228fef2ee1551bc957ec6712a7a5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Aug 2019 20:26:25 -0700 Subject: [PATCH 04/10] fix NaT add/sub TimedeltaArray, needs test --- pandas/_libs/tslibs/nattype.pyx | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 7f35a11e57b71..6bb665d5393ce 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -123,19 +123,21 @@ cdef class _NaT(datetime): return c_NaT elif getattr(other, '_typ', None) in ['dateoffset', 'series', 'period', 'datetimeindex', - 'timedeltaindex']: + 'timedeltaindex', + 'timedeltaarray']: # Duplicate logic in _Timestamp.__add__ to avoid needing # to subclass; allows us to @final(_Timestamp.__add__) return NotImplemented + # FIXME: aren't there a tone of cases where this will be wrong? return c_NaT def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) if PyDateTime_Check(other): - return NaT + return c_NaT elif PyDelta_Check(other): - return NaT + return c_NaT elif getattr(other, '_typ', None) == 'datetimeindex': # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex @@ -151,10 +153,11 @@ cdef class _NaT(datetime): return self + neg_other elif getattr(other, '_typ', None) in ['period', 'series', - 'periodindex', 'dateoffset']: + 'periodindex', 'dateoffset', + 'timedeltaarray']: return NotImplemented - - return NaT + # FIXME: aren't there a tone of cases where this will be wrong? + return c_NaT def __pos__(self): return NaT From 7239891fd88e1488788b67355952f0a57963bbc4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 08:58:25 -0700 Subject: [PATCH 05/10] remove usage of dispatch_to_index_op --- pandas/core/ops/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index a9001790728ac..d5293fa0e8a4e 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -990,6 +990,7 @@ def wrapper(left, right): elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): from pandas.core.arrays import DatetimeArray + result = dispatch_to_extension_op(op, DatetimeArray(left), right) return construct_result(left, result, index=left.index, name=res_name) @@ -1001,7 +1002,9 @@ def wrapper(left, right): return construct_result(left, result, index=left.index, name=res_name) elif is_timedelta64_dtype(left): - result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex) + from pandas.core.arrays import TimedeltaArray + + result = dispatch_to_extension_op(op, TimedeltaArray(left), right) return construct_result(left, result, index=left.index, name=res_name) elif is_timedelta64_dtype(right): @@ -1151,11 +1154,13 @@ def wrapper(self, other, axis=None): # Dispatch to DatetimeIndex to ensure identical # Series/Index behavior from pandas.core.arrays import DatetimeArray + res_values = dispatch_to_extension_op(op, DatetimeArray(self), other) return self._constructor(res_values, index=self.index, name=res_name) elif is_timedelta64_dtype(self): from pandas.core.arrays import TimedeltaArray + res_values = dispatch_to_extension_op(op, TimedeltaArray(self), other) return self._constructor(res_values, index=self.index, name=res_name) From 868d2a22e1337a78052620b9573cb2732dcdb1e1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 09:07:26 -0700 Subject: [PATCH 06/10] remove dispatch_to_index_op --- pandas/core/ops/__init__.py | 38 +------------------------------------ 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d5293fa0e8a4e..9d505ac5f4dab 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -602,42 +602,6 @@ def column_op(a, b): return result -def dispatch_to_index_op(op, left, right, index_class): - """ - Wrap Series left in the given index_class to delegate the operation op - to the index implementation. DatetimeIndex and TimedeltaIndex perform - type checking, timezone handling, overflow checks, etc. - - Parameters - ---------- - op : binary operator (operator.add, operator.sub, ...) - left : Series - right : object - index_class : DatetimeIndex or TimedeltaIndex - - Returns - ------- - result : object, usually DatetimeIndex, TimedeltaIndex, or Series - """ - left_idx = index_class(left) - - # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, - # left_idx may inherit a freq from a cached DatetimeIndex. - # See discussion in GH#19147. - if getattr(left_idx, "freq", None) is not None: - left_idx = left_idx._shallow_copy(freq=None) - try: - result = op(left_idx, right) - except NullFrequencyError: - # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError - # on add/sub of integers (or int-like). We re-raise as a TypeError. - raise TypeError( - "incompatible type for a datetime/timedelta " - "operation [{name}]".format(name=op.__name__) - ) - return result - - def dispatch_to_extension_op(op, left, right): """ Assume that left or right is a Series backed by an ExtensionArray, @@ -1010,7 +974,7 @@ def wrapper(left, right): elif is_timedelta64_dtype(right): # We should only get here with non-scalar or timedelta64('NaT') # values for right - # Note: we cannot use dispatch_to_index_op because + # Note: we cannot use dispatch_to_extension_op because # that may incorrectly raise TypeError when we # should get NullFrequencyError orig_right = right From 792b187f4a2f422db9edb0fbe88f22af33aa1bfe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 09:21:06 -0700 Subject: [PATCH 07/10] tests for op with DatetimeArray/TimedeltaArray --- pandas/_libs/tslibs/nattype.pyx | 2 ++ pandas/tests/scalar/test_nat.py | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 6bb665d5393ce..057ec0e90901b 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -123,6 +123,7 @@ cdef class _NaT(datetime): return c_NaT elif getattr(other, '_typ', None) in ['dateoffset', 'series', 'period', 'datetimeindex', + 'datetimearray', 'timedeltaindex', 'timedeltaarray']: # Duplicate logic in _Timestamp.__add__ to avoid needing @@ -154,6 +155,7 @@ cdef class _NaT(datetime): elif getattr(other, '_typ', None) in ['period', 'series', 'periodindex', 'dateoffset', + 'datetimearray', 'timedeltaarray']: return NotImplemented # FIXME: aren't there a tone of cases where this will be wrong? diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index f935a7fa880c7..576569a8a0953 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -18,7 +18,7 @@ Timestamp, isna, ) -from pandas.core.arrays import PeriodArray +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.util import testing as tm @@ -397,7 +397,9 @@ def test_nat_rfloordiv_timedelta(val, expected): "value", [ DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), - DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), + DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], tz="US/Pacific"), TimedeltaIndex(["1 day", "2 day"], name="x"), ], ) @@ -406,19 +408,24 @@ def test_nat_arithmetic_index(op_name, value): exp_name = "x" exp_data = [NaT] * 2 - if isinstance(value, DatetimeIndex) and "plus" in op_name: - expected = DatetimeIndex(exp_data, name=exp_name, tz=value.tz) + if value.dtype.kind == "M" and "plus" in op_name: + expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) else: expected = TimedeltaIndex(exp_data, name=exp_name) - tm.assert_index_equal(_ops[op_name](NaT, value), expected) + if not isinstance(value, Index): + expected = expected.array + + op = _ops[op_name] + result = op(NaT, value) + tm.assert_equal(result, expected) @pytest.mark.parametrize( "op_name", ["left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left"], ) -@pytest.mark.parametrize("box", [TimedeltaIndex, Series]) +@pytest.mark.parametrize("box", [TimedeltaIndex, Series, TimedeltaArray._from_sequence]) def test_nat_arithmetic_td64_vector(op_name, box): # see gh-19124 vec = box(["1 day", "2 day"], dtype="timedelta64[ns]") From ff4161d2706ab319af0342055ea4808d8c230a94 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 09:23:14 -0700 Subject: [PATCH 08/10] revert tangential edits --- pandas/_libs/tslibs/nattype.pyx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 057ec0e90901b..6fab1b5c02be1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -129,16 +129,15 @@ cdef class _NaT(datetime): # Duplicate logic in _Timestamp.__add__ to avoid needing # to subclass; allows us to @final(_Timestamp.__add__) return NotImplemented - # FIXME: aren't there a tone of cases where this will be wrong? return c_NaT def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) if PyDateTime_Check(other): - return c_NaT + return NaT elif PyDelta_Check(other): - return c_NaT + return NaT elif getattr(other, '_typ', None) == 'datetimeindex': # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex @@ -158,8 +157,7 @@ cdef class _NaT(datetime): 'datetimearray', 'timedeltaarray']: return NotImplemented - # FIXME: aren't there a tone of cases where this will be wrong? - return c_NaT + return NaT def __pos__(self): return NaT From df81f8cc20fbaa359aee9fd57f481bc9783b0117 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 16:43:00 -0700 Subject: [PATCH 09/10] cln --- pandas/core/ops/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 41ad5d8804c8b..261660dda6fdd 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1122,9 +1122,7 @@ def wrapper(self, other, axis=None): # Note: the `not is_scalar(other)` condition rules out # e.g. other == "category" res_values = dispatch_to_extension_op(op, self, other) - return self._constructor( - res_values, index=self.index, name=res_name - ).rename(res_name) + return self._constructor(res_values, index=self.index).rename(res_name) elif isinstance(other, ABCSeries): # By this point we have checked that self._indexed_same(other) From fc1cbea8a62867808b74c17c6ababe0a1e895fa5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 18:17:14 -0700 Subject: [PATCH 10/10] dummy to force CI