Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Disallow missing nested label when indexing MultiIndex level #49628

Merged
merged 1 commit into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ Removal of prior version deprecations/changes
- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
- Enforced disallowing missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex`. This now raises a ``KeyError`` (:issue:`42351`)
- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
- Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`)
- Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`)
Expand Down
38 changes: 11 additions & 27 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3284,34 +3284,18 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
if not is_hashable(x):
# e.g. slice
raise err
try:
item_indexer = self._get_level_indexer(
x, level=i, indexer=indexer
)
except KeyError:
# ignore not founds; see discussion in GH#39424
warnings.warn(
"The behavior of indexing on a MultiIndex with a "
"nested sequence of labels is deprecated and will "
"change in a future version. "
"`series.loc[label, sequence]` will raise if any "
"members of 'sequence' or not present in "
"the index's second level. To retain the old "
"behavior, use `series.index.isin(sequence, level=1)`",
# TODO: how to opt in to the future behavior?
# TODO: how to handle IntervalIndex level?
# (no test cases)
FutureWarning,
stacklevel=find_stack_level(),
)
continue
# GH 39424: Ignore not founds
# GH 42351: No longer ignore not founds & enforced in 2.0
# TODO: how to handle IntervalIndex level? (no test cases)
item_indexer = self._get_level_indexer(
x, level=i, indexer=indexer
)
if lvl_indexer is None:
lvl_indexer = _to_bool_indexer(item_indexer)
elif isinstance(item_indexer, slice):
lvl_indexer[item_indexer] = True # type: ignore[index]
else:
if lvl_indexer is None:
lvl_indexer = _to_bool_indexer(item_indexer)
elif isinstance(item_indexer, slice):
lvl_indexer[item_indexer] = True # type: ignore[index]
else:
lvl_indexer |= item_indexer
lvl_indexer |= item_indexer

if lvl_indexer is None:
# no matches we are done
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,15 +443,12 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
if expected.size == 0 and indexer != []:
with pytest.raises(KeyError, match=str(indexer)):
ser.loc[indexer]
elif indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
with pytest.raises(KeyError, match="'bah'"):
ser.loc[indexer]
else:
warn = None
msg = "MultiIndex with a nested sequence"
if indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], so is ignored, will raise KeyError
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
result = ser.loc[indexer]
result = ser.loc[indexer]
tm.assert_series_equal(result, expected)


Expand Down
30 changes: 14 additions & 16 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import copy
import re
from textwrap import dedent
Expand Down Expand Up @@ -701,26 +702,26 @@ def test_applymap_subset(self, slice_, df):
def test_applymap_subset_multiindex(self, slice_):
# GH 19861
# edited for GH 33562
warn = None
msg = "indexing on a MultiIndex with a nested sequence of labels"
if (
isinstance(slice_[-1], tuple)
and isinstance(slice_[-1][-1], list)
and "C" in slice_[-1][-1]
):
warn = FutureWarning
ctx = pytest.raises(KeyError, match="C") # noqa: PDF010
elif (
isinstance(slice_[0], tuple)
and isinstance(slice_[0][1], list)
and 3 in slice_[0][1]
):
warn = FutureWarning
ctx = pytest.raises(KeyError, match="3") # noqa: PDF010
else:
ctx = contextlib.nullcontext()

idx = MultiIndex.from_product([["a", "b"], [1, 2]])
col = MultiIndex.from_product([["x", "y"], ["A", "B"]])
df = DataFrame(np.random.rand(4, 4), columns=col, index=idx)

with tm.assert_produces_warning(warn, match=msg):
with ctx:
df.style.applymap(lambda x: "color: red;", subset=slice_).to_html()

def test_applymap_subset_multiindex_code(self):
Expand Down Expand Up @@ -1390,7 +1391,7 @@ def test_non_reducing_slice_on_multiindex(self):
IndexSlice[:, IndexSlice["a", :, "e"]],
IndexSlice[:, IndexSlice[:, "c", "e"]],
IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # allow missing
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # don't allow missing
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice
# check rows
IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list
Expand All @@ -1399,7 +1400,7 @@ def test_non_reducing_slice_on_multiindex(self):
IndexSlice[IndexSlice["U", :, "Y"], :],
IndexSlice[IndexSlice[:, "W", "Y"], :],
IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # don't allow missing
IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice
# check simultaneous
IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]],
Expand All @@ -1411,21 +1412,18 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_):
idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]])
df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs)

msg = "indexing on a MultiIndex with a nested sequence of labels"
warn = None
for lvl in [0, 1]:
key = slice_[lvl]
if isinstance(key, tuple):
for subkey in key:
if isinstance(subkey, list) and "-" in subkey:
# not present in the index level, ignored, will raise in future
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
expected = df.loc[slice_]
# not present in the index level, raises KeyError since 2.0
with pytest.raises(KeyError, match="-"):
df.loc[slice_]
return

with tm.assert_produces_warning(warn, match=msg):
result = df.loc[non_reducing_slice(slice_)]
expected = df.loc[slice_]
result = df.loc[non_reducing_slice(slice_)]
tm.assert_frame_equal(result, expected)


Expand Down