Skip to content

Commit

Permalink
BUG: Fix passing empty label to df drop (#21515)
Browse files Browse the repository at this point in the history
Closes #21494
  • Loading branch information
alimcmaster1 authored and toobaz committed Jun 21, 2018
1 parent f91a704 commit f4fba9e
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 50 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.2.txt
Expand Up @@ -61,6 +61,7 @@ Bug Fixes

- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
-

Expand Down
21 changes: 11 additions & 10 deletions pandas/core/generic.py
Expand Up @@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
"""
axis = self._get_axis_number(axis)
axis_name = self._get_axis_name(axis)
axis, axis_ = self._get_axis(axis), axis
axis = self._get_axis(axis)

if axis.is_unique:
if level is not None:
Expand All @@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
new_axis = axis.drop(labels, level=level, errors=errors)
else:
new_axis = axis.drop(labels, errors=errors)
dropped = self.reindex(**{axis_name: new_axis})
try:
dropped.axes[axis_].set_names(axis.names, inplace=True)
except AttributeError:
pass
result = dropped
result = self.reindex(**{axis_name: new_axis})

# Case for non-unique axis
else:
labels = _ensure_object(com._index_labels_to_array(labels))
if level is not None:
if not isinstance(axis, MultiIndex):
raise AssertionError('axis must be a MultiIndex')
indexer = ~axis.get_level_values(level).isin(labels)

# GH 18561 MultiIndex.drop should raise if label is absent
if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
else:
indexer = ~axis.isin(labels)

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
# Check if label doesn't exist along axis
labels_missing = (axis.get_indexer_for(labels) == -1).any()
if errors == 'raise' and labels_missing:
raise KeyError('{} not found in axis'.format(labels))

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Expand Up @@ -4341,7 +4341,7 @@ def drop(self, labels, errors='raise'):
Raises
------
KeyError
If none of the labels are found in the selected axis
If not all of the labels are found in the selected axis
"""
arr_dtype = 'object' if self.dtype == 'object' else None
labels = com._index_labels_to_array(labels, dtype=arr_dtype)
Expand All @@ -4350,7 +4350,7 @@ def drop(self, labels, errors='raise'):
if mask.any():
if errors != 'ignore':
raise KeyError(
'labels %s not contained in axis' % labels[mask])
'{} not found in axis'.format(labels[mask]))
indexer = indexer[~mask]
return self.delete(indexer)

Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/multi.py
Expand Up @@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' %
labels[mask])
indexer = indexer[~mask]
except Exception:
pass

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/test_axis_select_reindex.py
Expand Up @@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
expected_no_err = actual.T.drop('c', axis=1, level=level,
errors='ignore')
assert_frame_equal(expected_no_err.T, actual)

@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
@pytest.mark.parametrize('drop_labels', [[], [1], [2]])
def test_drop_empty_list(self, index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
frame = pd.DataFrame(index=index).drop(drop_labels)
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))

@pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
@pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
def test_drop_non_empty_list(self, index, drop_labels):
# GH 21494
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.DataFrame(index=index).drop(drop_labels)
106 changes: 69 additions & 37 deletions pandas/tests/series/indexing/test_alter_index.py
Expand Up @@ -472,54 +472,86 @@ def test_rename():
assert result.name == expected.name


def test_drop():
# unique
s = Series([1, 2], index=['one', 'two'])
expected = Series([1], index=['one'])
result = s.drop(['two'])
assert_series_equal(result, expected)
result = s.drop('two', axis='rows')
assert_series_equal(result, expected)

# non-unique
# GH 5248
s = Series([1, 1, 2], index=['one', 'two', 'one'])
expected = Series([1, 2], index=['one', 'one'])
result = s.drop(['two'], axis=0)
assert_series_equal(result, expected)
result = s.drop('two')
assert_series_equal(result, expected)

expected = Series([1], index=['two'])
result = s.drop(['one'])
assert_series_equal(result, expected)
result = s.drop('one')
assert_series_equal(result, expected)
@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, expected_data, expected_index',
[
# Unique Index
([1, 2], ['one', 'two'], ['two'],
0, [1], ['one']),
([1, 2], ['one', 'two'], ['two'],
'rows', [1], ['one']),
([1, 1, 2], ['one', 'two', 'one'], ['two'],
0, [1, 2], ['one', 'one']),
# GH 5248 Non-Unique Index
([1, 1, 2], ['one', 'two', 'one'], 'two',
0, [1, 2], ['one', 'one']),
([1, 1, 2], ['one', 'two', 'one'], ['one'],
0, [1], ['two']),
([1, 1, 2], ['one', 'two', 'one'], 'one',
0, [1], ['two'])])
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
expected_data, expected_index):

s = Series(data=data, index=index)
result = s.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)

# single string/tuple-like
s = Series(range(3), index=list('abc'))
pytest.raises(KeyError, s.drop, 'bc')
pytest.raises(KeyError, s.drop, ('a',))

@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, error_type, error_desc',
[
# single string/tuple-like
(range(3), list('abc'), 'bc',
0, KeyError, 'not found in axis'),
# bad axis
(range(3), list('abc'), ('a',),
0, KeyError, 'not found in axis'),
(range(3), list('abc'), 'one',
'columns', ValueError, 'No axis named columns')])
def test_drop_exception_raised(data, index, drop_labels,
axis, error_type, error_desc):

with tm.assert_raises_regex(error_type, error_desc):
Series(data, index=index).drop(drop_labels, axis=axis)


def test_drop_with_ignore_errors():
# errors='ignore'
s = Series(range(3), index=list('abc'))
result = s.drop('bc', errors='ignore')
assert_series_equal(result, s)
tm.assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.iloc[1:]
assert_series_equal(result, expected)

# bad axis
pytest.raises(ValueError, s.drop, 'one', axis='columns')
tm.assert_series_equal(result, expected)

# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)


# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
s.drop([False, True])
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))


@pytest.mark.parametrize('data, index, drop_labels', [
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True])
])
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.Series(data=data, index=index).drop(drop_labels)

0 comments on commit f4fba9e

Please sign in to comment.