Skip to content

Commit

Permalink
[Backport 14346] BUG: GH14323 Union of differences from DatetimeIndex…
Browse files Browse the repository at this point in the history
… incorrect

closes pandas-dev#14323

Sets freq to None when doing a difference operation on a DatetimeIndex
or TimedeltaIndex, rather than retaining the frequency (which can
cause  problems with downstream operations). Frequency of PeriodIndex
is retained.

Author: David Krych <davidk@ciphercap.com>

Closes pandas-dev#14346 from Liam3851/dtind_diff_14323 and squashes the following commits:

1dbf582 [David Krych] BUG: GH14323 Union of differences from DatetimeIndex incorrect

(cherry picked from commit bee90a7)
  • Loading branch information
Liam3851 authored and jorisvandenbossche committed Nov 1, 2016
1 parent 9bca038 commit 6400cdd
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.1.txt
Expand Up @@ -44,6 +44,8 @@ Bug Fixes
- Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`)

- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
- Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)



- Source installs from PyPI will now work without ``cython`` installed, as in previous versions (:issue:`14204`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/indexes/base.py
Expand Up @@ -2003,7 +2003,7 @@ def difference(self, other):
except TypeError:
pass

return this._shallow_copy(the_diff, name=result_name)
return this._shallow_copy(the_diff, name=result_name, freq=None)

def symmetric_difference(self, other, result_name=None):
"""
Expand Down
74 changes: 74 additions & 0 deletions pandas/tests/indexes/test_datetimelike.py
Expand Up @@ -732,6 +732,31 @@ def test_fillna_datetime64(self):
dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

def test_difference_of_union(self):
# GH14323: Test taking the union of differences of an Index.
# Difference of DatetimeIndex does not preserve frequency,
# so a differencing operation should not retain the freq field of the
# original index.
i = pd.date_range("20160920", "20160925", freq="D")

a = pd.date_range("20160921", "20160924", freq="D")
expected = pd.DatetimeIndex(["20160920", "20160925"], freq=None)
a_diff = i.difference(a)
tm.assert_index_equal(a_diff, expected)
tm.assert_attr_equal('freq', a_diff, expected)

b = pd.date_range("20160922", "20160925", freq="D")
b_diff = i.difference(b)
expected = pd.DatetimeIndex(["20160920", "20160921"], freq=None)
tm.assert_index_equal(b_diff, expected)
tm.assert_attr_equal('freq', b_diff, expected)

union_of_diff = a_diff.union(b_diff)
expected = pd.DatetimeIndex(["20160920", "20160921", "20160925"],
freq=None)
tm.assert_index_equal(union_of_diff, expected)
tm.assert_attr_equal('freq', union_of_diff, expected)


class TestPeriodIndex(DatetimeLike, tm.TestCase):
_holder = PeriodIndex
Expand Down Expand Up @@ -938,6 +963,30 @@ def test_no_millisecond_field(self):
with self.assertRaises(AttributeError):
DatetimeIndex([]).millisecond

def test_difference_of_union(self):
# GH14323: Test taking the union of differences of an Index.
# Difference of Period MUST preserve frequency, but the ability
# to union results must be preserved
i = pd.period_range("20160920", "20160925", freq="D")

a = pd.period_range("20160921", "20160924", freq="D")
expected = pd.PeriodIndex(["20160920", "20160925"], freq='D')
a_diff = i.difference(a)
tm.assert_index_equal(a_diff, expected)
tm.assert_attr_equal('freq', a_diff, expected)

b = pd.period_range("20160922", "20160925", freq="D")
b_diff = i.difference(b)
expected = pd.PeriodIndex(["20160920", "20160921"], freq='D')
tm.assert_index_equal(b_diff, expected)
tm.assert_attr_equal('freq', b_diff, expected)

union_of_diff = a_diff.union(b_diff)
expected = pd.PeriodIndex(["20160920", "20160921", "20160925"],
freq='D')
tm.assert_index_equal(union_of_diff, expected)
tm.assert_attr_equal('freq', union_of_diff, expected)


class TestTimedeltaIndex(DatetimeLike, tm.TestCase):
_holder = TimedeltaIndex
Expand Down Expand Up @@ -1149,3 +1198,28 @@ def test_fillna_timedelta(self):
exp = pd.Index(
[pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

def test_difference_of_union(self):
# GH14323: Test taking the union of differences of an Index.
# Difference of TimedeltaIndex does not preserve frequency,
# so a differencing operation should not retain the freq field of the
# original index.
i = pd.timedelta_range("0 days", "5 days", freq="D")

a = pd.timedelta_range("1 days", "4 days", freq="D")
expected = pd.TimedeltaIndex(["0 days", "5 days"], freq=None)
a_diff = i.difference(a)
tm.assert_index_equal(a_diff, expected)
tm.assert_attr_equal('freq', a_diff, expected)

b = pd.timedelta_range("2 days", "5 days", freq="D")
b_diff = i.difference(b)
expected = pd.TimedeltaIndex(["0 days", "1 days"], freq=None)
tm.assert_index_equal(b_diff, expected)
tm.assert_attr_equal('freq', b_diff, expected)

union_of_difference = a_diff.union(b_diff)
expected = pd.TimedeltaIndex(["0 days", "1 days", "5 days"],
freq=None)
tm.assert_index_equal(union_of_difference, expected)
tm.assert_attr_equal('freq', union_of_difference, expected)

0 comments on commit 6400cdd

Please sign in to comment.