Revert set_index inspection/error handling for 0.24.1 (#25085)

* DOC: Minor what's new fix (#24933) * Backport PR #24916: BUG-24212 fix regression in #24897 (#24951) * Revert "Backport PR #24916: BUG-24212 fix regression in #24897 (#24951)" This reverts commit 84056c5.
pandas-dev · Feb 3, 2019 · 1fc38f6 · 1fc38f6
1 parent 51e6cb1
commit 1fc38f6
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 42 deletions.
diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst
@@ -58,6 +58,7 @@ Fixed Regressions
 - Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`).
 - Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`)
 - Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`)
+- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`)
 
 .. _whatsnew_0241.bug_fixes:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -71,7 +71,7 @@
     is_iterator,
     is_sequence,
     is_named_tuple)
-from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import algorithms
@@ -4154,33 +4154,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         4 16     10  2014    31
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-
-        err_msg = ('The parameter "keys" may be a column key, one-dimensional '
-                   'array, or a list containing only valid column keys and '
-                   'one-dimensional arrays.')
-
-        if (is_scalar(keys) or isinstance(keys, tuple)
-                or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
-            # make sure we have a container of keys/arrays we can iterate over
-            # tuples can appear as valid column keys!
+        if not isinstance(keys, list):
             keys = [keys]
-        elif not isinstance(keys, list):
-            raise ValueError(err_msg)
-
-        missing = []
-        for col in keys:
-            if (is_scalar(col) or isinstance(col, tuple)):
-                # if col is a valid column key, everything is fine
-                # tuples are always considered keys, never as list-likes
-                if col not in self:
-                    missing.append(col)
-            elif (not isinstance(col, (ABCIndexClass, ABCSeries,
-                                       np.ndarray, list))
-                  or getattr(col, 'ndim', 1) > 1):
-                raise ValueError(err_msg)
-
-        if missing:
-            raise KeyError('{}'.format(missing))
 
         if inplace:
             frame = self
@@ -4191,31 +4166,37 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         names = []
         if append:
             names = [x for x in self.index.names]
-            if isinstance(self.index, ABCMultiIndex):
+            if isinstance(self.index, MultiIndex):
                 for i in range(self.index.nlevels):
                     arrays.append(self.index._get_level_values(i))
             else:
                 arrays.append(self.index)
 
         to_remove = []
         for col in keys:
-            if isinstance(col, ABCMultiIndex):
-                for n in range(col.nlevels):
+            if isinstance(col, MultiIndex):
+                # append all but the last column so we don't have to modify
+                # the end of this loop
+                for n in range(col.nlevels - 1):
                     arrays.append(col._get_level_values(n))
+
+                level = col._get_level_values(col.nlevels - 1)
                 names.extend(col.names)
-            elif isinstance(col, (ABCIndexClass, ABCSeries)):
-                # if Index then not MultiIndex (treated above)
-                arrays.append(col)
+            elif isinstance(col, Series):
+                level = col._values
+                names.append(col.name)
+            elif isinstance(col, Index):
+                level = col
                 names.append(col.name)
-            elif isinstance(col, (list, np.ndarray)):
-                arrays.append(col)
+            elif isinstance(col, (list, np.ndarray, Index)):
+                level = col
                 names.append(None)
-            # from here, col can only be a column label
             else:
-                arrays.append(frame[col]._values)
+                level = frame[col]._values
                 names.append(col)
                 if drop:
                     to_remove.append(col)
+            arrays.append(level)
 
         index = ensure_index_from_sequences(arrays, names)
 

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -253,23 +253,129 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
             df.set_index(['A', df['A'], tuple(df['A'])],
                          drop=drop, append=append)
 
+    @pytest.mark.xfail(reason='broken due to revert, see GH 25085')
     @pytest.mark.parametrize('append', [True, False])
     @pytest.mark.parametrize('drop', [True, False])
-    @pytest.mark.parametrize('box', [set, iter])
+    @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
+                             ids=['set', 'iter', 'generator'])
     def test_set_index_raise_on_type(self, frame_of_index_cols, box,
                                      drop, append):
         df = frame_of_index_cols
 
         msg = 'The parameter "keys" may be a column key, .*'
-        # forbidden type, e.g. set/tuple/iter
-        with pytest.raises(ValueError, match=msg):
+        # forbidden type, e.g. set/iter/generator
+        with pytest.raises(TypeError, match=msg):
             df.set_index(box(df['A']), drop=drop, append=append)
 
-        # forbidden type in list, e.g. set/tuple/iter
-        with pytest.raises(ValueError, match=msg):
+        # forbidden type in list, e.g. set/iter/generator
+        with pytest.raises(TypeError, match=msg):
             df.set_index(['A', df['A'], box(df['A'])],
                          drop=drop, append=append)
 
+    def test_set_index_custom_label_type(self):
+        # GH 24969
+
+        class Thing(object):
+            def __init__(self, name, color):
+                self.name = name
+                self.color = color
+
+            def __str__(self):
+                return "<Thing %r>" % (self.name,)
+
+            # necessary for pretty KeyError
+            __repr__ = __str__
+
+        thing1 = Thing('One', 'red')
+        thing2 = Thing('Two', 'blue')
+        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
+        expected = DataFrame({thing1: [0, 1]},
+                             index=Index([2, 3], name=thing2))
+
+        # use custom label directly
+        result = df.set_index(thing2)
+        tm.assert_frame_equal(result, expected)
+
+        # custom label wrapped in list
+        result = df.set_index([thing2])
+        tm.assert_frame_equal(result, expected)
+
+        # missing key
+        thing3 = Thing('Three', 'pink')
+        msg = "<Thing 'Three'>"
+        with pytest.raises(KeyError, match=msg):
+            # missing label directly
+            df.set_index(thing3)
+
+        with pytest.raises(KeyError, match=msg):
+            # missing label in list
+            df.set_index([thing3])
+
+    def test_set_index_custom_label_hashable_iterable(self):
+        # GH 24969
+
+        # actual example discussed in GH 24984 was e.g. for shapely.geometry
+        # objects (e.g. a collection of Points) that can be both hashable and
+        # iterable; using frozenset as a stand-in for testing here
+
+        class Thing(frozenset):
+            # need to stabilize repr for KeyError (due to random order in sets)
+            def __repr__(self):
+                tmp = sorted(list(self))
+                # double curly brace prints one brace in format string
+                return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))
+
+        thing1 = Thing(['One', 'red'])
+        thing2 = Thing(['Two', 'blue'])
+        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
+        expected = DataFrame({thing1: [0, 1]},
+                             index=Index([2, 3], name=thing2))
+
+        # use custom label directly
+        result = df.set_index(thing2)
+        tm.assert_frame_equal(result, expected)
+
+        # custom label wrapped in list
+        result = df.set_index([thing2])
+        tm.assert_frame_equal(result, expected)
+
+        # missing key
+        thing3 = Thing(['Three', 'pink'])
+        msg = '.*'  # due to revert, see GH 25085
+        with pytest.raises(KeyError, match=msg):
+            # missing label directly
+            df.set_index(thing3)
+
+        with pytest.raises(KeyError, match=msg):
+            # missing label in list
+            df.set_index([thing3])
+
+    def test_set_index_custom_label_type_raises(self):
+        # GH 24969
+
+        # purposefully inherit from something unhashable
+        class Thing(set):
+            def __init__(self, name, color):
+                self.name = name
+                self.color = color
+
+            def __str__(self):
+                return "<Thing %r>" % (self.name,)
+
+        thing1 = Thing('One', 'red')
+        thing2 = Thing('Two', 'blue')
+        df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
+
+        msg = 'unhashable type.*'
+
+        with pytest.raises(TypeError, match=msg):
+            # use custom label directly
+            df.set_index(thing2)
+
+        with pytest.raises(TypeError, match=msg):
+            # custom label wrapped in list
+            df.set_index([thing2])
+
     def test_construction_with_categorical_index(self):
         ci = tm.makeCategoricalIndex(10)
         ci.name = 'B'