Skip to content

Commit

Permalink
BUG: implement MultiIndex.diff, add & and | for intersection/union, GH
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Oct 21, 2011
1 parent 89fecc6 commit 24ed9ed
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ feedback on the library.
- Add inner join option to `DataFrame.join` when joining on key(s) (GH #248)
- Can select set of DataFrame columns by passing a list to `__getitem__` (GH
#253)
- Can use & and | to intersection / union Index objects, respectively

**Improvements to existing features**

Expand Down Expand Up @@ -158,6 +159,7 @@ feedback on the library.
- `DataFrame.iteritems` and `DataFrame._series` not assigning name attribute
- Panel.__repr__ raised exception on length-0 major/minor axes
- `DataFrame.join` on key with empty DataFrame produced incorrect columns
- Implemented `MultiIndex.diff` (GH #260)
- `Int64Index.take` and `MultiIndex.take` lost name field, fix downstream
issue GH #262
- `read_csv` / `read_table` fixes
Expand Down
28 changes: 26 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,15 @@ def __add__(self, other):
__le__ = _indexOp('__le__')
__ge__ = _indexOp('__ge__')

def __sub__(self, other):
return self.diff(other)

def __and__(self, other):
return self.intersection(other)

def __or__(self, other):
return self.union(other)

def union(self, other):
"""
Form the union of two Index objects and sorts if possible
Expand Down Expand Up @@ -363,8 +372,6 @@ def diff(self, other):
theDiff = sorted(set(self) - set(otherArr))
return Index(theDiff)

__sub__ = diff

def get_loc(self, key):
"""
Get integer location for requested label
Expand Down Expand Up @@ -1514,6 +1521,23 @@ def intersection(self, other):
uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
return MultiIndex.from_arrays(zip(*uniq_tuples), sortorder=0)

def diff(self, other):
"""
Compute sorted set difference of two MultiIndex objects
Returns
-------
diff : MultiIndex
"""
self._assert_can_do_setop(other)

if self.equals(other):
return self[:0]

difference = sorted(set(self.values) - set(other.values))
return MultiIndex.from_tuples(difference, sortorder=0,
names=self.names)

def _assert_can_do_setop(self, other):
if not isinstance(other, MultiIndex):
raise TypeError('can only call with other hierarchical '
Expand Down
19 changes: 17 additions & 2 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ def test_union(self):
piece1 = self.index[:5][::-1]
piece2 = self.index[3:]

the_union = piece1.union(piece2)
the_union = piece1 | piece2

tups = sorted(self.index.get_tuple_index())
expected = MultiIndex.from_tuples(tups)
Expand All @@ -884,7 +884,7 @@ def test_intersection(self):
piece1 = self.index[:5][::-1]
piece2 = self.index[3:]

the_int = piece1.intersection(piece2)
the_int = piece1 & piece2
tups = sorted(self.index[3:5].get_tuple_index())
expected = MultiIndex.from_tuples(tups)
self.assert_(the_int.equals(expected))
Expand All @@ -896,6 +896,21 @@ def test_intersection(self):
self.assertRaises(TypeError, self.index.intersection,
self.index.get_tuple_index())

def test_diff(self):
first = self.index
result = first - self.index[-3:]
expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
sortorder=0,
names=self.index.names)

self.assert_(isinstance(result, MultiIndex))
self.assert_(result.equals(expected))
self.assertEqual(result.names, self.index.names)

result = first - first
expected = first[:0]
self.assert_(result.equals(expected))

def test_argsort(self):
result = self.index.argsort()
expected = self.index.get_tuple_index().argsort()
Expand Down

0 comments on commit 24ed9ed

Please sign in to comment.