From 02224a4e73d4dc175342ff20737cf5bcb777ea65 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 17 Oct 2020 08:04:59 -0700 Subject: [PATCH 1/6] BUG: .loc with MultiIndex with names[1] = 0 --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/generic.py | 4 ++- pandas/core/indexes/base.py | 15 ++++++++--- pandas/core/indexes/multi.py | 28 ++++++++++++++------ pandas/tests/indexing/multiindex/test_loc.py | 23 ++++++++++++++++ 5 files changed, 58 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index dfbbb456f50b6..0c439a5311b51 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -401,6 +401,7 @@ Indexing - Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`) - Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`) - Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`??`) Missing ^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 784e8877ef128..16534f68353cb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3677,7 +3677,9 @@ class animal locomotion index = self.index if isinstance(index, MultiIndex): try: - loc, new_index = self.index.get_loc_level(key, drop_level=drop_level) + loc, new_index = self.index._get_loc_level( + key, level=0, drop_level=drop_level + ) except TypeError as e: raise TypeError(f"Expected label or tuple of labels, got {key}") from e else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87dd15d5b142b..4d219e4670c3e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1552,12 +1552,19 @@ def droplevel(self, level=0): levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] - if len(level) == 0: + return self._drop_level_nums(levnums) + + def _drop_level_nums(self, levnums: List[int]) -> "MultiIndex": + """ + Drop MultiIndex levels by level _number_, not name. + """ + + if len(levnums) == 0: return self - if len(level) >= self.nlevels: + if len(levnums) >= self.nlevels: raise ValueError( - f"Cannot remove {len(level)} levels from an index with {self.nlevels} " - "levels: at least one level must be left." + f"Cannot remove {len(levnums)} levels from an index with " + f"{self.nlevels} levels: at least one level must be left." ) # The two checks above guarantee that here self is a MultiIndex self = cast("MultiIndex", self) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d012d5704f716..6fd0b7c0dabc7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2864,16 +2864,29 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): >>> mi.get_loc_level(['b', 'e']) (1, None) """ + if not isinstance(level, (list, tuple)): + level = self._get_level_number(level) + else: + level = [self._get_level_number(lev) for lev in level] + return self._get_loc_level(key, level=level, drop_level=drop_level) + + def _get_loc_level( + self, key, level: Union[int, List[int]] = 0, drop_level: bool = True + ): + """ + get_loc_level but with `level` known to be positional, not name-based. + """ + # different name to distinguish from maybe_droplevels def maybe_mi_droplevels(indexer, levels, drop_level: bool): if not drop_level: return self[indexer] # kludge around orig_index = new_index = self[indexer] - levels = [self._get_level_number(i) for i in levels] + for i in sorted(levels, reverse=True): try: - new_index = new_index.droplevel(i) + new_index = new_index._drop_level_nums([i]) except ValueError: # no dropping here @@ -2887,7 +2900,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool): ) result = None for lev, k in zip(level, key): - loc, new_index = self.get_loc_level(k, level=lev) + loc, new_index = self._get_loc_level(k, level=lev) if isinstance(loc, slice): mask = np.zeros(len(self), dtype=bool) mask[loc] = True @@ -2897,8 +2910,6 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool): return result, maybe_mi_droplevels(result, level, drop_level) - level = self._get_level_number(level) - # kludge for #1796 if isinstance(key, list): key = tuple(key) @@ -2963,7 +2974,8 @@ def partial_selection(key, indexer=None): indexer = self._get_level_indexer(key, level=level) return indexer, maybe_mi_droplevels(indexer, [level], drop_level) - def _get_level_indexer(self, key, level=0, indexer=None): + def _get_level_indexer(self, key, level: int = 0, indexer=None): + # `level` kwarg is _always_ positional, never name # return an indexer, boolean array or a slice showing where the key is # in the totality of values # if the indexer is provided, then use this @@ -3767,13 +3779,13 @@ def maybe_droplevels(index, key): if isinstance(key, tuple): for _ in key: try: - index = index.droplevel(0) + index = index._drop_level_nums([0]) except ValueError: # we have dropped too much, so back out return original_index else: try: - index = index.droplevel(0) + index = index._drop_level_nums([0]) except ValueError: pass diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 1b659bec0e9e8..ef29b659e88b2 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -522,3 +522,26 @@ def test_loc_with_mi_indexer(): columns=["author", "price"], ) tm.assert_frame_equal(result, expected) + + +def test_loc_mi_with_level1_named_0(): + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + ser = pd.Series(range(3), index=dti) + df = ser.to_frame() + df[1] = dti + + df2 = df.set_index(0, append=True) + assert df2.index.names == (None, 0) + df2.index.get_loc(dti[0]) # smoke test + + result = df2.loc[dti[0]] + expected = df2.iloc[[0]].droplevel(None) + tm.assert_frame_equal(result, expected) + + ser2 = df2[1] + assert ser2.index.names == (None, 0) + + result = ser2.loc[dti[0]] + expected = ser2.iloc[[0]].droplevel(None) + tm.assert_series_equal(result, expected) From cb232f62e998c2ffe77085748957f45b14bdd23e Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 17 Oct 2020 08:06:05 -0700 Subject: [PATCH 2/6] GH refs --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0c439a5311b51..8c3048c538e4f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -401,7 +401,7 @@ Indexing - Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`) - Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`) - Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`) -- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`??`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ef29b659e88b2..c659a36a3a456 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -525,6 +525,7 @@ def test_loc_with_mi_indexer(): def test_loc_mi_with_level1_named_0(): + # GH#37194 dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") ser = pd.Series(range(3), index=dti) From 30739244d283e68c1eefacc4756dc042f1f9c87a Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 17 Oct 2020 08:45:21 -0700 Subject: [PATCH 3/6] mypy fixup --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3fd6f4f5b9591..abcd11948f084 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1554,7 +1554,7 @@ def droplevel(self, level=0): return self._drop_level_nums(levnums) - def _drop_level_nums(self, levnums: List[int]) -> "MultiIndex": + def _drop_level_nums(self, levnums: List[int]): """ Drop MultiIndex levels by level _number_, not name. """ From 79a821873a7ddf66f9b6bcfb22c126121a846375 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 17 Oct 2020 10:29:07 -0700 Subject: [PATCH 4/6] rename drop_level_nums -> drop_level_numbers --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/multi.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index abcd11948f084..8831e302a83cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1552,9 +1552,9 @@ def droplevel(self, level=0): levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] - return self._drop_level_nums(levnums) + return self._drop_level_numbers(levnums) - def _drop_level_nums(self, levnums: List[int]): + def _drop_level_numbers(self, levnums: List[int]): """ Drop MultiIndex levels by level _number_, not name. """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3a85c4529dff7..80087db10d791 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3779,13 +3779,13 @@ def maybe_droplevels(index, key): if isinstance(key, tuple): for _ in key: try: - index = index._drop_level_nums([0]) + index = index._drop_level_numbers([0]) except ValueError: # we have dropped too much, so back out return original_index else: try: - index = index._drop_level_nums([0]) + index = index._drop_level_numbers([0]) except ValueError: pass From ec5435754807b85effaf0ba9d249f068350f5a34 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 17 Oct 2020 16:07:20 -0700 Subject: [PATCH 5/6] typo fixup --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 80087db10d791..380df22861218 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2886,7 +2886,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool): for i in sorted(levels, reverse=True): try: - new_index = new_index._drop_level_nums([i]) + new_index = new_index._drop_level_numbers([i]) except ValueError: # no dropping here From f81d1cd20a654eef6a4fef63cc9ed46d8fc95e51 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Oct 2020 10:11:17 -0700 Subject: [PATCH 6/6] lint fixup --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index dba28925f6d78..0e466b49f6597 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -528,7 +528,7 @@ def test_loc_mi_with_level1_named_0(): # GH#37194 dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") - ser = pd.Series(range(3), index=dti) + ser = Series(range(3), index=dti) df = ser.to_frame() df[1] = dti