Skip to content

Commit

Permalink
API: raise on setops for + and - for Indexes (GH8226)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Sep 11, 2014
1 parent 54678dd commit 07a46af
Show file tree
Hide file tree
Showing 23 changed files with 110 additions and 98 deletions.
17 changes: 11 additions & 6 deletions doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1616,28 +1616,33 @@ display:
df
df['A']
.. _indexing.setops:
Set operations on Index objects
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain
index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``.
.. _indexing.set_ops:
The three main operations are ``union (|)``, ``intersection (&)``, and ``diff
(-)``. These can be directly called as instance methods or used via overloaded
operators:
The two main operations are ``union (|)``, ``intersection (&)``
These can be directly called as instance methods or used via overloaded
operators. Difference is provided via the ``.difference()`` method.
.. ipython:: python
a = Index(['c', 'b', 'a'])
b = Index(['c', 'e', 'd'])
a.union(b)
a | b
a & b
a - b
a.difference(b)
Also available is the ``sym_diff (^)`` operation, which returns elements
that appear in either ``idx1`` or ``idx2`` but not both. This is
equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
equivalent to the Index created by ``(idx1.difference(idx2)).union(idx2.difference(idx1))``,
with duplicates dropped.

.. ipython:: python
Expand Down
6 changes: 6 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ users upgrade to this version.
- Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring <whatsnew_0150.refactoring>`
- New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties <whatsnew_0150.dt>`
- dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`)
- API change in using Indexs set operations, see :ref:`here <whatsnew_0150.index_set_ops>`

- :ref:`Other Enhancements <whatsnew_0150.enhancements>`

Expand Down Expand Up @@ -343,6 +344,11 @@ API changes
- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of
``DataFrame.to_csv()`` (:issue:`8215`).


.. _whatsnew_0150.index_set_ops:

- The Index set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()``

.. _whatsnew_0150.dt:

.dt accessor
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,8 +711,10 @@ def __add__(self, other):
from pandas.core.index import Index
from pandas.tseries.offsets import DateOffset
if isinstance(other, Index):
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
"use .union()",FutureWarning)
return self.union(other)
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
return self._add_delta(other)
elif com.is_integer(other):
return self.shift(other)
Expand All @@ -723,8 +725,10 @@ def __sub__(self, other):
from pandas.core.index import Index
from pandas.tseries.offsets import DateOffset
if isinstance(other, Index):
return self.diff(other)
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
"use .difference()",FutureWarning)
return self.difference(other)
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
return self._add_delta(-other)
elif com.is_integer(other):
return self.shift(-other)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def reorder_levels(self, new_levels, ordered=None):
"""
new_levels = self._validate_levels(new_levels)

if len(new_levels) < len(self._levels) or len(self._levels-new_levels):
if len(new_levels) < len(self._levels) or len(self._levels.difference(new_levels)):
raise ValueError('Reordered levels must include all original levels')
values = self.__array__()
self._codes = _get_codes_for_values(values, new_levels)
Expand Down Expand Up @@ -887,7 +887,7 @@ def __setitem__(self, key, value):
raise ValueError("cannot set a Categorical with another, without identical levels")

rvalue = value if com.is_list_like(value) else [value]
to_add = Index(rvalue)-self.levels
to_add = Index(rvalue).difference(self.levels)
# no assignments of values not in levels, but it's always ok to set something to np.nan
if len(to_add) and not isnull(to_add).all():
raise ValueError("cannot setitem on a Categorical with a new level,"
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3682,7 +3682,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
'ignore_index=True')

index = None if other.name is None else [other.name]
combined_columns = self.columns.tolist() + ((self.columns | other.index) - self.columns).tolist()
combined_columns = self.columns.tolist() + (self.columns | other.index).difference(self.columns).tolist()
other = other.reindex(combined_columns, copy=False)
other = DataFrame(other.values.reshape((1, len(other))),
index=index, columns=combined_columns).convert_objects()
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def _set_selection_from_grouper(self):
ax = self.obj._info_axis
groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
if len(groupers):
self._group_selection = (ax-Index(groupers)).tolist()
self._group_selection = ax.difference(Index(groupers)).tolist()

def _set_result_index_ordered(self, result):
# set the result index on the passed values object
Expand Down
23 changes: 15 additions & 8 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,9 +1128,10 @@ def argsort(self, *args, **kwargs):

def __add__(self, other):
if isinstance(other, Index):
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
"use '|' or .union()",FutureWarning)
return self.union(other)
else:
return Index(np.array(self) + other)
return Index(np.array(self) + other)

__iadd__ = __add__
__eq__ = _indexOp('__eq__')
Expand All @@ -1141,7 +1142,10 @@ def __add__(self, other):
__ge__ = _indexOp('__ge__')

def __sub__(self, other):
return self.diff(other)
if isinstance(other, Index):
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
"use .difference()",FutureWarning)
return self.difference(other)

def __and__(self, other):
return self.intersection(other)
Expand Down Expand Up @@ -1273,7 +1277,7 @@ def intersection(self, other):
taken.name = None
return taken

def diff(self, other):
def difference(self, other):
"""
Compute sorted set difference of two Index objects
Expand All @@ -1289,8 +1293,7 @@ def diff(self, other):
-----
One can do either of these and achieve the same result
>>> index - index2
>>> index.diff(index2)
>>> index.difference(index2)
"""

if not hasattr(other, '__iter__'):
Expand All @@ -1308,6 +1311,8 @@ def diff(self, other):
theDiff = sorted(set(self) - set(other))
return Index(theDiff, name=result_name)

diff = deprecate('diff',difference)

def sym_diff(self, other, result_name=None):
"""
Compute the sorted symmetric difference of two Index objects.
Expand Down Expand Up @@ -1350,7 +1355,7 @@ def sym_diff(self, other, result_name=None):
other = Index(other)
result_name = result_name or self.name

the_diff = sorted(set((self - other) + (other - self)))
the_diff = sorted(set((self.difference(other)).union(other.difference(self))))
return Index(the_diff, name=result_name)

def get_loc(self, key):
Expand Down Expand Up @@ -4135,6 +4140,8 @@ def union(self, other):
Returns
-------
Index
>>> index.union(index2)
"""
self._assert_can_do_setop(other)

Expand Down Expand Up @@ -4177,7 +4184,7 @@ def intersection(self, other):
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
names=result_names)

def diff(self, other):
def difference(self, other):
"""
Compute sorted set difference of two MultiIndex objects
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,9 +678,9 @@ def _combine_frame(self, other, func, axis=0):
self.minor_axis)

def _combine_panel(self, other, func):
items = self.items + other.items
major = self.major_axis + other.major_axis
minor = self.minor_axis + other.minor_axis
items = self.items.union(other.items)
major = self.major_axis.union(other.major_axis)
minor = self.minor_axis.union(other.minor_axis)

# could check that everything's the same size, but forget it
this = self.reindex(items=items, major=major, minor=minor)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/panelnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _combine_with_constructor(self, other, func):
# combine labels to form new axes
new_axes = []
for a in self._AXIS_ORDERS:
new_axes.append(getattr(self, a) + getattr(other, a))
new_axes.append(getattr(self, a).union(getattr(other, a)))

# reindex: could check that everything's the same size, but forget it
d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)])
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ def lreshape(data, groups, dropna=True, label=None):
keys, values = zip(*groups)

all_cols = list(set.union(*[set(x) for x in values]))
id_cols = list(data.columns.diff(all_cols))
id_cols = list(data.columns.difference(all_cols))

K = len(values[0])

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ def combine(self, other, func, fill_value=nan):
result : Series
"""
if isinstance(other, Series):
new_index = self.index + other.index
new_index = self.index.union(other.index)
new_name = _maybe_match_name(self, other)
new_values = pa.empty(len(new_index), dtype=self.dtype)
for i, idx in enumerate(new_index):
Expand All @@ -1484,7 +1484,7 @@ def combine_first(self, other):
-------
y : Series
"""
new_index = self.index + other.index
new_index = self.index.union(other.index)
this = self.reindex(new_index, copy=False)
other = other.reindex(new_index, copy=False)
name = _maybe_match_name(self, other)
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
remain_values.extend(v)
if remain_key is not None:
ordered = value.axes[axis]
ordd = ordered - Index(remain_values)
ordd = ordered.difference(Index(remain_values))
ordd = sorted(ordered.get_indexer(ordd))
d[remain_key] = ordered.take(ordd)

Expand Down Expand Up @@ -3245,7 +3245,7 @@ def get_blk_items(mgr, blocks):
data_columns, min_itemsize)
if len(data_columns):
mgr = block_obj.reindex_axis(
Index(axis_labels) - Index(data_columns),
Index(axis_labels).difference(Index(data_columns)),
axis=axis
)._data

Expand Down Expand Up @@ -3362,7 +3362,7 @@ def process_filter(field, filt):
# if we have a multi-index, then need to include
# the levels
if self.is_multi_index:
filt = filt + Index(self.levels)
filt = filt.union(Index(self.levels))

takers = op(axis_values, filt)
return obj.ix._getitem_axis(takers,
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,7 @@ def test_remove_startstop(self):
n = store.remove('wp5', start=16, stop=-16)
self.assertTrue(n == 120-32)
result = store.select('wp5')
expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
expected = wp.reindex(major_axis=wp.major_axis[:16//4].union(wp.major_axis[-16//4:]))
assert_panel_equal(result, expected)

_maybe_remove(store, 'wp6')
Expand All @@ -2339,7 +2339,7 @@ def test_remove_startstop(self):
n = store.remove('wp7', where=[crit], stop=80)
self.assertTrue(n == 28)
result = store.select('wp7')
expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
expected = wp.reindex(major_axis=wp.major_axis.difference(wp.major_axis[np.arange(0,20,3)]))
assert_panel_equal(result, expected)

def test_remove_crit(self):
Expand All @@ -2357,7 +2357,7 @@ def test_remove_crit(self):
self.assertTrue(n == 36)

result = store.select('wp3')
expected = wp.reindex(major_axis=wp.major_axis - date4)
expected = wp.reindex(major_axis=wp.major_axis.difference(date4))
assert_panel_equal(result, expected)

# upper half
Expand Down Expand Up @@ -2385,23 +2385,23 @@ def test_remove_crit(self):
crit1 = Term('major_axis=date1')
store.remove('wp2', where=[crit1])
result = store.select('wp2')
expected = wp.reindex(major_axis=wp.major_axis - date1)
expected = wp.reindex(major_axis=wp.major_axis.difference(date1))
assert_panel_equal(result, expected)

date2 = wp.major_axis[5]
crit2 = Term('major_axis=date2')
store.remove('wp2', where=[crit2])
result = store['wp2']
expected = wp.reindex(
major_axis=wp.major_axis - date1 - Index([date2]))
major_axis=wp.major_axis.difference(date1).difference(Index([date2])))
assert_panel_equal(result, expected)

date3 = [wp.major_axis[7], wp.major_axis[9]]
crit3 = Term('major_axis=date3')
store.remove('wp2', where=[crit3])
result = store['wp2']
expected = wp.reindex(
major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
major_axis=wp.major_axis.difference(date1).difference(Index([date2])).difference(Index(date3)))
assert_panel_equal(result, expected)

# corners
Expand Down
6 changes: 3 additions & 3 deletions pandas/sparse/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,9 @@ def _new_like(self, new_frames):
default_kind=self.default_kind)

def _combinePanel(self, other, func):
items = self.items + other.items
major = self.major_axis + other.major_axis
minor = self.minor_axis + other.minor_axis
items = self.items.union(other.items)
major = self.major_axis.union(other.major_axis)
minor = self.minor_axis.union(other.minor_axis)

# could check that everything's the same size, but forget it

Expand Down
Loading

0 comments on commit 07a46af

Please sign in to comment.