diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 2d93652ca91db..9345f11aca341 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -932,14 +932,16 @@ New Behavior: Index ``+`` / ``-`` no longer used for set operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Addition and subtraction of the base Index type (not the numeric subclasses) +Addition and subtraction of the base Index type and of DatetimeIndex +(not the numeric index types) previously performed set operations (set union and difference). This behaviour was already deprecated since 0.15.0 (in favor using the specific ``.union()`` and ``.difference()`` methods), and is now disabled. When possible, ``+`` and ``-`` are now used for element-wise operations, for -example for concatenating strings (:issue:`8227`, :issue:`14127`). +example for concatenating strings or subtracting datetimes +(:issue:`8227`, :issue:`14127`). -Previous Behavior: +Previous behavior: .. code-block:: ipython @@ -962,6 +964,23 @@ For example, the behaviour of adding two integer Indexes: is unchanged. The base ``Index`` is now made consistent with this behaviour. +Further, because of this change, it is now possible to subtract two +DatetimeIndex objects resulting in a TimedeltaIndex: + +Previous behavior: + +.. code-block:: ipython + + In [1]: pd.DatetimeIndex(['2016-01-01', '2016-01-02']) - pd.DatetimeIndex(['2016-01-02', '2016-01-03']) + FutureWarning: using '-' to provide set differences with datetimelike Indexes is deprecated, use .difference() + Out[1]: DatetimeIndex(['2016-01-01'], dtype='datetime64[ns]', freq=None) + +New behavior: + +.. ipython:: python + + pd.DatetimeIndex(['2016-01-01', '2016-01-02']) - pd.DatetimeIndex(['2016-01-02', '2016-01-03']) + .. _whatsnew_0190.api.difference: diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 1690a9b229db2..3b676b894d355 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -2,7 +2,6 @@ Base and utility classes for tseries type pandas objects. """ -import warnings from datetime import datetime, timedelta from pandas import compat @@ -628,10 +627,9 @@ def __add__(self, other): raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) elif isinstance(other, Index): - warnings.warn("using '+' to provide set union with " - "datetimelike Indexes is deprecated, " - "use .union()", FutureWarning, stacklevel=2) - return self.union(other) + raise TypeError("cannot add {typ1} and {typ2}" + .format(typ1=type(self).__name__, + typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(other) @@ -646,6 +644,7 @@ def __add__(self, other): def __sub__(self, other): from pandas.core.index import Index + from pandas.tseries.index import DatetimeIndex from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.offsets import DateOffset if isinstance(other, TimedeltaIndex): @@ -653,13 +652,14 @@ def __sub__(self, other): elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" - .format(typ=type(other))) + .format(typ=type(other).__name__)) return self._add_delta(-other) + elif isinstance(other, DatetimeIndex): + return self._sub_datelike(other) elif isinstance(other, Index): - warnings.warn("using '-' to provide set differences with " - "datetimelike Indexes is deprecated, " - "use .difference()", FutureWarning, stacklevel=2) - return self.difference(other) + raise TypeError("cannot subtract {typ1} and {typ2}" + .format(typ1=type(self).__name__, + typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(-other) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 351edf1b38352..e26a0548fdc78 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -731,19 +731,43 @@ def _add_datelike(self, other): def _sub_datelike(self, other): # subtract a datetime from myself, yielding a TimedeltaIndex from pandas import TimedeltaIndex - other = Timestamp(other) - if other is tslib.NaT: - result = self._nat_new(box=False) - # require tz compat - elif not self._has_same_tz(other): - raise TypeError("Timestamp subtraction must have the same " - "timezones or no timezones") + if isinstance(other, DatetimeIndex): + # require tz compat + if not self._has_same_tz(other): + raise TypeError("DatetimeIndex subtraction must have the same " + "timezones or no timezones") + result = self._sub_datelike_dti(other) + elif isinstance(other, (tslib.Timestamp, datetime)): + other = Timestamp(other) + if other is tslib.NaT: + result = self._nat_new(box=False) + # require tz compat + elif not self._has_same_tz(other): + raise TypeError("Timestamp subtraction must have the same " + "timezones or no timezones") + else: + i8 = self.asi8 + result = i8 - other.value + result = self._maybe_mask_results(result, + fill_value=tslib.iNaT) else: - i8 = self.asi8 - result = i8 - other.value - result = self._maybe_mask_results(result, fill_value=tslib.iNaT) + raise TypeError("cannot subtract DatetimeIndex and {typ}" + .format(typ=type(other).__name__)) return TimedeltaIndex(result, name=self.name, copy=False) + def _sub_datelike_dti(self, other): + """subtraction of two DatetimeIndexes""" + if not len(self) == len(other): + raise ValueError("cannot add indices of unequal length") + + self_i8 = self.asi8 + other_i8 = other.asi8 + new_values = self_i8 - other_i8 + if self.hasnans or other.hasnans: + mask = (self._isnan) | (other._isnan) + new_values[mask] = tslib.iNaT + return new_values.view('i8') + def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ freq = attrs.get('freq', None) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 96ff74c819624..8a86fcba32ecb 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -360,7 +360,7 @@ def test_resolution(self): tz=tz) self.assertEqual(idx.resolution, expected) - def test_add_iadd(self): + def test_union(self): for tz in self.tz: # union rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) @@ -378,17 +378,12 @@ def test_add_iadd(self): for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3)]: - # GH9094 - with tm.assert_produces_warning(FutureWarning): - result_add = rng + other - result_union = rng.union(other) - tm.assert_index_equal(result_add, expected) + result_union = rng.union(other) tm.assert_index_equal(result_union, expected) - # GH9094 - with tm.assert_produces_warning(FutureWarning): - rng += other - tm.assert_index_equal(rng, expected) + + def test_add_iadd(self): + for tz in self.tz: # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -421,7 +416,26 @@ def test_add_iadd(self): with tm.assertRaisesRegexp(TypeError, msg): Timestamp('2011-01-01') + idx - def test_sub_isub(self): + def test_add_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now raises + # TypeError (GH14164) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + + with tm.assertRaises(TypeError): + dti + dti + + with tm.assertRaises(TypeError): + dti_tz + dti_tz + + with tm.assertRaises(TypeError): + dti_tz + dti + + with tm.assertRaises(TypeError): + dti + dti_tz + + def test_difference(self): for tz in self.tz: # diff rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) @@ -439,9 +453,11 @@ def test_sub_isub(self): for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3)]: - result_union = rng.difference(other) + result_diff = rng.difference(other) + tm.assert_index_equal(result_diff, expected) - tm.assert_index_equal(result_union, expected) + def test_sub_isub(self): + for tz in self.tz: # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -449,9 +465,10 @@ def test_sub_isub(self): for delta in offsets: rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng - delta expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) + + result = rng - delta tm.assert_index_equal(result, expected) rng -= delta tm.assert_index_equal(rng, expected) @@ -466,6 +483,47 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + + with tm.assertRaises(TypeError): + dti_tz - dti + + with tm.assertRaises(TypeError): + dti - dti_tz + + with tm.assertRaises(TypeError): + dti_tz - dti_tz2 + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range('20130101', periods=3) + dti2 = date_range('20130101', periods=4) + with tm.assertRaises(ValueError): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) + dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) + expected = TimedeltaIndex(['1 days', np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + def test_sub_period(self): # GH 13078 # not supported, check TypeError @@ -1239,50 +1297,6 @@ def _check(result, expected): ['20121231', '20130101', '20130102'], tz='US/Eastern') tm.assert_index_equal(result, expected) - def test_dti_dti_deprecated_ops(self): - - # deprecated in 0.16.0 (GH9094) - # change to return subtraction -> TimeDeltaIndex in 0.17.0 - # shoudl move to the appropriate sections above - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - - with tm.assert_produces_warning(FutureWarning): - result = dti - dti - expected = Index([]) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = dti + dti - expected = dti - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = dti_tz - dti_tz - expected = Index([]) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = dti_tz + dti_tz - expected = dti_tz - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = dti_tz - dti - expected = dti_tz - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = dti - dti_tz - expected = dti - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - self.assertRaises(TypeError, lambda: dti_tz + dti) - with tm.assert_produces_warning(FutureWarning): - self.assertRaises(TypeError, lambda: dti + dti_tz) - def test_dti_tdi_numeric_ops(self): # These are normally union/diff set-like ops @@ -2005,7 +2019,7 @@ def test_resolution(self): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) self.assertEqual(idx.resolution, expected) - def test_add_iadd(self): + def test_union(self): # union rng1 = pd.period_range('1/1/2000', freq='D', periods=5) other1 = pd.period_range('1/6/2000', freq='D', periods=5) @@ -2031,7 +2045,8 @@ def test_add_iadd(self): rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05'], freq='T') other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' - '2000-01-01 09:08'], freq='T') + '2000-01-01 09:08'], + freq='T') expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05', '2000-01-01 09:08'], freq='T') @@ -2052,20 +2067,19 @@ def test_add_iadd(self): expected6), (rng7, other7, expected7)]: - # GH9094 - with tm.assert_produces_warning(FutureWarning): - result_add = rng + other - result_union = rng.union(other) - - tm.assert_index_equal(result_add, expected) tm.assert_index_equal(result_union, expected) - # GH 6527 - # GH9094 - with tm.assert_produces_warning(FutureWarning): - rng += other - tm.assert_index_equal(rng, expected) + def test_add_iadd(self): + rng = pd.period_range('1/1/2000', freq='D', periods=5) + other = pd.period_range('1/6/2000', freq='D', periods=5) + + # previously performed setop union, now raises TypeError (GH14164) + with tm.assertRaises(TypeError): + rng + other + + with tm.assertRaises(TypeError): + rng += other # offset # DateOffset @@ -2152,7 +2166,7 @@ def test_add_iadd(self): rng += 1 tm.assert_index_equal(rng, expected) - def test_sub_isub(self): + def test_difference(self): # diff rng1 = pd.period_range('1/1/2000', freq='D', periods=5) other1 = pd.period_range('1/6/2000', freq='D', periods=5) @@ -2194,6 +2208,19 @@ def test_sub_isub(self): result_union = rng.difference(other) tm.assert_index_equal(result_union, expected) + def test_sub_isub(self): + + # previously performed setop, now raises TypeError (GH14164) + # TODO needs to wait on #13077 for decision on result type + rng = pd.period_range('1/1/2000', freq='D', periods=5) + other = pd.period_range('1/6/2000', freq='D', periods=5) + + with tm.assertRaises(TypeError): + rng - other + + with tm.assertRaises(TypeError): + rng -= other + # offset # DateOffset rng = pd.period_range('2014', '2024', freq='A')