Skip to content

Commit

Permalink
Revert set_index inspection/error handling for 0.24.1 (#25085)
Browse files Browse the repository at this point in the history
* DOC: Minor what's new fix (#24933)

* Backport PR #24916: BUG-24212 fix regression in #24897 (#24951)

* Revert "Backport PR #24916: BUG-24212 fix regression in #24897 (#24951)"

This reverts commit 84056c5.
  • Loading branch information
h-vetinari authored and TomAugspurger committed Feb 3, 2019
1 parent 51e6cb1 commit 1fc38f6
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 42 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Fixed Regressions
- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`).
- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`)
- Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`)
- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`)

.. _whatsnew_0241.bug_fixes:

Expand Down
55 changes: 18 additions & 37 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
is_iterator,
is_sequence,
is_named_tuple)
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
from pandas.core.dtypes.missing import isna, notna

from pandas.core import algorithms
Expand Down Expand Up @@ -4154,33 +4154,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
4 16 10 2014 31
"""
inplace = validate_bool_kwarg(inplace, 'inplace')

err_msg = ('The parameter "keys" may be a column key, one-dimensional '
'array, or a list containing only valid column keys and '
'one-dimensional arrays.')

if (is_scalar(keys) or isinstance(keys, tuple)
or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
# make sure we have a container of keys/arrays we can iterate over
# tuples can appear as valid column keys!
if not isinstance(keys, list):
keys = [keys]
elif not isinstance(keys, list):
raise ValueError(err_msg)

missing = []
for col in keys:
if (is_scalar(col) or isinstance(col, tuple)):
# if col is a valid column key, everything is fine
# tuples are always considered keys, never as list-likes
if col not in self:
missing.append(col)
elif (not isinstance(col, (ABCIndexClass, ABCSeries,
np.ndarray, list))
or getattr(col, 'ndim', 1) > 1):
raise ValueError(err_msg)

if missing:
raise KeyError('{}'.format(missing))

if inplace:
frame = self
Expand All @@ -4191,31 +4166,37 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
names = []
if append:
names = [x for x in self.index.names]
if isinstance(self.index, ABCMultiIndex):
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

to_remove = []
for col in keys:
if isinstance(col, ABCMultiIndex):
for n in range(col.nlevels):
if isinstance(col, MultiIndex):
# append all but the last column so we don't have to modify
# the end of this loop
for n in range(col.nlevels - 1):
arrays.append(col._get_level_values(n))

level = col._get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, (ABCIndexClass, ABCSeries)):
# if Index then not MultiIndex (treated above)
arrays.append(col)
elif isinstance(col, Series):
level = col._values
names.append(col.name)
elif isinstance(col, Index):
level = col
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
arrays.append(col)
elif isinstance(col, (list, np.ndarray, Index)):
level = col
names.append(None)
# from here, col can only be a column label
else:
arrays.append(frame[col]._values)
level = frame[col]._values
names.append(col)
if drop:
to_remove.append(col)
arrays.append(level)

index = ensure_index_from_sequences(arrays, names)

Expand Down
116 changes: 111 additions & 5 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,129 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
df.set_index(['A', df['A'], tuple(df['A'])],
drop=drop, append=append)

@pytest.mark.xfail(reason='broken due to revert, see GH 25085')
@pytest.mark.parametrize('append', [True, False])
@pytest.mark.parametrize('drop', [True, False])
@pytest.mark.parametrize('box', [set, iter])
@pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
ids=['set', 'iter', 'generator'])
def test_set_index_raise_on_type(self, frame_of_index_cols, box,
drop, append):
df = frame_of_index_cols

msg = 'The parameter "keys" may be a column key, .*'
# forbidden type, e.g. set/tuple/iter
with pytest.raises(ValueError, match=msg):
# forbidden type, e.g. set/iter/generator
with pytest.raises(TypeError, match=msg):
df.set_index(box(df['A']), drop=drop, append=append)

# forbidden type in list, e.g. set/tuple/iter
with pytest.raises(ValueError, match=msg):
# forbidden type in list, e.g. set/iter/generator
with pytest.raises(TypeError, match=msg):
df.set_index(['A', df['A'], box(df['A'])],
drop=drop, append=append)

def test_set_index_custom_label_type(self):
# GH 24969

class Thing(object):
def __init__(self, name, color):
self.name = name
self.color = color

def __str__(self):
return "<Thing %r>" % (self.name,)

# necessary for pretty KeyError
__repr__ = __str__

thing1 = Thing('One', 'red')
thing2 = Thing('Two', 'blue')
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
expected = DataFrame({thing1: [0, 1]},
index=Index([2, 3], name=thing2))

# use custom label directly
result = df.set_index(thing2)
tm.assert_frame_equal(result, expected)

# custom label wrapped in list
result = df.set_index([thing2])
tm.assert_frame_equal(result, expected)

# missing key
thing3 = Thing('Three', 'pink')
msg = "<Thing 'Three'>"
with pytest.raises(KeyError, match=msg):
# missing label directly
df.set_index(thing3)

with pytest.raises(KeyError, match=msg):
# missing label in list
df.set_index([thing3])

def test_set_index_custom_label_hashable_iterable(self):
# GH 24969

# actual example discussed in GH 24984 was e.g. for shapely.geometry
# objects (e.g. a collection of Points) that can be both hashable and
# iterable; using frozenset as a stand-in for testing here

class Thing(frozenset):
# need to stabilize repr for KeyError (due to random order in sets)
def __repr__(self):
tmp = sorted(list(self))
# double curly brace prints one brace in format string
return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))

thing1 = Thing(['One', 'red'])
thing2 = Thing(['Two', 'blue'])
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
expected = DataFrame({thing1: [0, 1]},
index=Index([2, 3], name=thing2))

# use custom label directly
result = df.set_index(thing2)
tm.assert_frame_equal(result, expected)

# custom label wrapped in list
result = df.set_index([thing2])
tm.assert_frame_equal(result, expected)

# missing key
thing3 = Thing(['Three', 'pink'])
msg = '.*' # due to revert, see GH 25085
with pytest.raises(KeyError, match=msg):
# missing label directly
df.set_index(thing3)

with pytest.raises(KeyError, match=msg):
# missing label in list
df.set_index([thing3])

def test_set_index_custom_label_type_raises(self):
# GH 24969

# purposefully inherit from something unhashable
class Thing(set):
def __init__(self, name, color):
self.name = name
self.color = color

def __str__(self):
return "<Thing %r>" % (self.name,)

thing1 = Thing('One', 'red')
thing2 = Thing('Two', 'blue')
df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])

msg = 'unhashable type.*'

with pytest.raises(TypeError, match=msg):
# use custom label directly
df.set_index(thing2)

with pytest.raises(TypeError, match=msg):
# custom label wrapped in list
df.set_index([thing2])

def test_construction_with_categorical_index(self):
ci = tm.makeCategoricalIndex(10)
ci.name = 'B'
Expand Down

0 comments on commit 1fc38f6

Please sign in to comment.