BUG: Fix passing empty label to df drop (#21515)

Closes #21494
pandas-dev · Jun 21, 2018 · f4fba9e · f4fba9e
1 parent f91a704
commit f4fba9e
Show file tree

Hide file tree

Showing 6 changed files with 98 additions and 50 deletions.
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
@@ -61,6 +61,7 @@ Bug Fixes
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
+- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
 - Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
 -
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
         """
         axis = self._get_axis_number(axis)
         axis_name = self._get_axis_name(axis)
-        axis, axis_ = self._get_axis(axis), axis
+        axis = self._get_axis(axis)
 
         if axis.is_unique:
             if level is not None:
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
                 new_axis = axis.drop(labels, level=level, errors=errors)
             else:
                 new_axis = axis.drop(labels, errors=errors)
-            dropped = self.reindex(**{axis_name: new_axis})
-            try:
-                dropped.axes[axis_].set_names(axis.names, inplace=True)
-            except AttributeError:
-                pass
-            result = dropped
+            result = self.reindex(**{axis_name: new_axis})
 
+        # Case for non-unique axis
         else:
             labels = _ensure_object(com._index_labels_to_array(labels))
             if level is not None:
                 if not isinstance(axis, MultiIndex):
                     raise AssertionError('axis must be a MultiIndex')
                 indexer = ~axis.get_level_values(level).isin(labels)
+
+                # GH 18561 MultiIndex.drop should raise if label is absent
+                if errors == 'raise' and indexer.all():
+                    raise KeyError('{} not found in axis'.format(labels))
             else:
                 indexer = ~axis.isin(labels)
-
-            if errors == 'raise' and indexer.all():
-                raise KeyError('{} not found in axis'.format(labels))
+                # Check if label doesn't exist along axis
+                labels_missing = (axis.get_indexer_for(labels) == -1).any()
+                if errors == 'raise' and labels_missing:
+                    raise KeyError('{} not found in axis'.format(labels))
 
             slicer = [slice(None)] * self.ndim
             slicer[self._get_axis_number(axis_name)] = indexer

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4341,7 +4341,7 @@ def drop(self, labels, errors='raise'):
         Raises
         ------
         KeyError
-            If none of the labels are found in the selected axis
+            If not all of the labels are found in the selected axis
         """
         arr_dtype = 'object' if self.dtype == 'object' else None
         labels = com._index_labels_to_array(labels, dtype=arr_dtype)
@@ -4350,7 +4350,7 @@ def drop(self, labels, errors='raise'):
         if mask.any():
             if errors != 'ignore':
                 raise KeyError(
-                    'labels %s not contained in axis' % labels[mask])
+                    '{} not found in axis'.format(labels[mask]))
             indexer = indexer[~mask]
         return self.delete(indexer)
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
                 if errors != 'ignore':
                     raise ValueError('labels %s not contained in axis' %
                                      labels[mask])
-                indexer = indexer[~mask]
         except Exception:
             pass
 

diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
         expected_no_err = actual.T.drop('c', axis=1, level=level,
                                         errors='ignore')
         assert_frame_equal(expected_no_err.T, actual)
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
+    @pytest.mark.parametrize('drop_labels', [[], [1], [2]])
+    def test_drop_empty_list(self, index, drop_labels):
+        # GH 21494
+        expected_index = [i for i in index if i not in drop_labels]
+        frame = pd.DataFrame(index=index).drop(drop_labels)
+        tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
+    @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
+    def test_drop_non_empty_list(self, index, drop_labels):
+        # GH 21494
+        with tm.assert_raises_regex(KeyError, 'not found in axis'):
+            pd.DataFrame(index=index).drop(drop_labels)
diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py
@@ -472,54 +472,86 @@ def test_rename():
     assert result.name == expected.name
 
 
-def test_drop():
-    # unique
-    s = Series([1, 2], index=['one', 'two'])
-    expected = Series([1], index=['one'])
-    result = s.drop(['two'])
-    assert_series_equal(result, expected)
-    result = s.drop('two', axis='rows')
-    assert_series_equal(result, expected)
-
-    # non-unique
-    # GH 5248
-    s = Series([1, 1, 2], index=['one', 'two', 'one'])
-    expected = Series([1, 2], index=['one', 'one'])
-    result = s.drop(['two'], axis=0)
-    assert_series_equal(result, expected)
-    result = s.drop('two')
-    assert_series_equal(result, expected)
-
-    expected = Series([1], index=['two'])
-    result = s.drop(['one'])
-    assert_series_equal(result, expected)
-    result = s.drop('one')
-    assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, expected_data, expected_index',
+    [
+        # Unique Index
+        ([1, 2], ['one', 'two'], ['two'],
+         0, [1], ['one']),
+        ([1, 2], ['one', 'two'], ['two'],
+         'rows', [1], ['one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['two'],
+         0, [1, 2], ['one', 'one']),
+
+        # GH 5248 Non-Unique Index
+        ([1, 1, 2], ['one', 'two', 'one'], 'two',
+         0, [1, 2], ['one', 'one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['one'],
+         0, [1], ['two']),
+        ([1, 1, 2], ['one', 'two', 'one'], 'one',
+         0, [1], ['two'])])
+def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
+                                          expected_data, expected_index):
+
+    s = Series(data=data, index=index)
+    result = s.drop(drop_labels, axis=axis)
+    expected = Series(data=expected_data, index=expected_index)
+    tm.assert_series_equal(result, expected)
 
-    # single string/tuple-like
-    s = Series(range(3), index=list('abc'))
-    pytest.raises(KeyError, s.drop, 'bc')
-    pytest.raises(KeyError, s.drop, ('a',))
 
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, error_type, error_desc',
+    [
+        # single string/tuple-like
+        (range(3), list('abc'), 'bc',
+         0, KeyError, 'not found in axis'),
+
+        # bad axis
+        (range(3), list('abc'), ('a',),
+         0, KeyError, 'not found in axis'),
+        (range(3), list('abc'), 'one',
+         'columns', ValueError, 'No axis named columns')])
+def test_drop_exception_raised(data, index, drop_labels,
+                               axis, error_type, error_desc):
+
+    with tm.assert_raises_regex(error_type, error_desc):
+        Series(data, index=index).drop(drop_labels, axis=axis)
+
+
+def test_drop_with_ignore_errors():
     # errors='ignore'
     s = Series(range(3), index=list('abc'))
     result = s.drop('bc', errors='ignore')
-    assert_series_equal(result, s)
+    tm.assert_series_equal(result, s)
     result = s.drop(['a', 'd'], errors='ignore')
     expected = s.iloc[1:]
-    assert_series_equal(result, expected)
-
-    # bad axis
-    pytest.raises(ValueError, s.drop, 'one', axis='columns')
+    tm.assert_series_equal(result, expected)
 
     # GH 8522
     s = Series([2, 3], index=[True, False])
     assert s.index.is_object()
     result = s.drop(True)
     expected = Series([3], index=[False])
-    assert_series_equal(result, expected)
+    tm.assert_series_equal(result, expected)
+
 
-    # GH 16877
-    s = Series([2, 3], index=[0, 1])
-    with tm.assert_raises_regex(KeyError, 'not contained in axis'):
-        s.drop([False, True])
+@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
+@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
+def test_drop_empty_list(index, drop_labels):
+    # GH 21494
+    expected_index = [i for i in index if i not in drop_labels]
+    series = pd.Series(index=index).drop(drop_labels)
+    tm.assert_series_equal(series, pd.Series(index=expected_index))
+
+
+@pytest.mark.parametrize('data, index, drop_labels', [
+    (None, [1, 2, 3], [1, 4]),
+    (None, [1, 2, 2], [1, 4]),
+    ([2, 3], [0, 1], [False, True])
+])
+def test_drop_non_empty_list(data, index, drop_labels):
+    # GH 21494 and GH 16877
+    with tm.assert_raises_regex(KeyError, 'not found in axis'):
+        pd.Series(data=data, index=index).drop(drop_labels)