Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: .loc with MultiIndex with names[1] = 0 #37194

Merged
merged 11 commits into from
Oct 23, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ Indexing
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`)

Missing
^^^^^^^
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3684,7 +3684,9 @@ class animal locomotion
index = self.index
if isinstance(index, MultiIndex):
try:
loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
loc, new_index = self.index._get_loc_level(
key, level=0, drop_level=drop_level
)
except TypeError as e:
raise TypeError(f"Expected label or tuple of labels, got {key}") from e
else:
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,12 +1552,19 @@ def droplevel(self, level=0):

levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]

if len(level) == 0:
return self._drop_level_numbers(levnums)

def _drop_level_numbers(self, levnums: List[int]):
jreback marked this conversation as resolved.
Show resolved Hide resolved
"""
Drop MultiIndex levels by level _number_, not name.
"""

if len(levnums) == 0:
return self
if len(level) >= self.nlevels:
if len(levnums) >= self.nlevels:
raise ValueError(
f"Cannot remove {len(level)} levels from an index with {self.nlevels} "
"levels: at least one level must be left."
f"Cannot remove {len(levnums)} levels from an index with "
f"{self.nlevels} levels: at least one level must be left."
)
# The two checks above guarantee that here self is a MultiIndex
self = cast("MultiIndex", self)
Expand Down
28 changes: 20 additions & 8 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2864,16 +2864,29 @@ def get_loc_level(self, key, level=0, drop_level: bool = True):
>>> mi.get_loc_level(['b', 'e'])
(1, None)
"""
if not isinstance(level, (list, tuple)):
level = self._get_level_number(level)
else:
level = [self._get_level_number(lev) for lev in level]
return self._get_loc_level(key, level=level, drop_level=drop_level)

def _get_loc_level(
self, key, level: Union[int, List[int]] = 0, drop_level: bool = True
):
"""
get_loc_level but with `level` known to be positional, not name-based.
"""

# different name to distinguish from maybe_droplevels
def maybe_mi_droplevels(indexer, levels, drop_level: bool):
if not drop_level:
return self[indexer]
# kludge around
orig_index = new_index = self[indexer]
levels = [self._get_level_number(i) for i in levels]

for i in sorted(levels, reverse=True):
try:
new_index = new_index.droplevel(i)
new_index = new_index._drop_level_numbers([i])
except ValueError:

# no dropping here
Expand All @@ -2887,7 +2900,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool):
)
result = None
for lev, k in zip(level, key):
loc, new_index = self.get_loc_level(k, level=lev)
loc, new_index = self._get_loc_level(k, level=lev)
if isinstance(loc, slice):
mask = np.zeros(len(self), dtype=bool)
mask[loc] = True
Expand All @@ -2897,8 +2910,6 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool):

return result, maybe_mi_droplevels(result, level, drop_level)

level = self._get_level_number(level)

# kludge for #1796
if isinstance(key, list):
key = tuple(key)
Expand Down Expand Up @@ -2963,7 +2974,8 @@ def partial_selection(key, indexer=None):
indexer = self._get_level_indexer(key, level=level)
return indexer, maybe_mi_droplevels(indexer, [level], drop_level)

def _get_level_indexer(self, key, level=0, indexer=None):
def _get_level_indexer(self, key, level: int = 0, indexer=None):
# `level` kwarg is _always_ positional, never name
# return an indexer, boolean array or a slice showing where the key is
# in the totality of values
# if the indexer is provided, then use this
Expand Down Expand Up @@ -3767,13 +3779,13 @@ def maybe_droplevels(index, key):
if isinstance(key, tuple):
for _ in key:
try:
index = index.droplevel(0)
index = index._drop_level_numbers([0])
except ValueError:
# we have dropped too much, so back out
return original_index
else:
try:
index = index.droplevel(0)
index = index._drop_level_numbers([0])
except ValueError:
pass

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,30 @@ def test_loc_with_mi_indexer():
tm.assert_frame_equal(result, expected)


def test_loc_mi_with_level1_named_0():
# GH#37194
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")

ser = Series(range(3), index=dti)
df = ser.to_frame()
df[1] = dti

df2 = df.set_index(0, append=True)
assert df2.index.names == (None, 0)
df2.index.get_loc(dti[0]) # smoke test

result = df2.loc[dti[0]]
expected = df2.iloc[[0]].droplevel(None)
tm.assert_frame_equal(result, expected)

ser2 = df2[1]
assert ser2.index.names == (None, 0)

result = ser2.loc[dti[0]]
expected = ser2.iloc[[0]].droplevel(None)
tm.assert_series_equal(result, expected)


def test_getitem_str_slice(datapath):
# GH#15928
path = datapath("reshape", "merge", "data", "quotes2.csv")
Expand Down