Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,7 @@ Indexing
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
- Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`)
- Bug in :meth:`MultiIndex._get_loc_level` raising ``TypeError`` when using a ``datetime.date`` key on a level containing incompatible objects (:issue:`55969`)
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)

Expand Down
61 changes: 61 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Iterable,
Sequence,
)
import datetime
from functools import wraps
from itertools import zip_longest
from sys import getsizeof
Expand Down Expand Up @@ -3460,6 +3461,23 @@ def maybe_mi_droplevels(indexer, levels):

return new_index

# type-check key against level(s), raise error if mismatch
if isinstance(key, tuple):
for i, k in enumerate(key):
if not self._is_key_type_compatible(k, i):
raise TypeError(
f"Type mismatch at index level {i}: "
f"expected {type(self.levels[i][0]).__name__}, "
f"got {type(k).__name__}"
)
else:
if not self._is_key_type_compatible(key, level):
raise TypeError(
f"Type mismatch at index level {level}: "
f"expected {type(self.levels[level][0]).__name__}, "
f"got {type(key).__name__}"
)

if isinstance(level, (tuple, list)):
if len(key) != len(level):
raise AssertionError(
Expand Down Expand Up @@ -3591,6 +3609,49 @@ def maybe_mi_droplevels(indexer, levels):

return indexer, result_index

def _is_key_type_compatible(self, key, level):
"""
Return True if the key type is compatible with the type of the level's values.
Compatible types:
- int ↔ np.integer
- float ↔ np.floating
- str ↔ np.str_
- datetime.date ↔ datetime.datetime
- slices (for partial indexing)
"""
if len(self.levels[level]) == 0:
return True # nothing to compare

level_val = self.levels[level][0]
level_type = type(level_val)

# Same type
if isinstance(key, level_type):
return True

# NumPy integer / float / string compatibility
if isinstance(level_val, np.integer) and isinstance(key, int):
return True
if isinstance(level_val, np.floating) and isinstance(key, float):
return True
if isinstance(level_val, np.str_) and isinstance(key, str):
return True

# Allow subclasses of datetime.date for datetime levels
if isinstance(level_val, datetime.date) and isinstance(key, datetime.date):
return True

# Allow slices (used internally for partial selection)
if isinstance(key, slice):
return True

# Allow any NumPy generic types for flexibility
if isinstance(key, np.generic):
return True

return False

def _get_level_indexer(
self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None
):
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/indexes/multi/test_datetime_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import datetime as dt

import numpy as np
import pytest

import pandas as pd


def test_multiindex_date_npdatetime_mismatch_raises():
dates = [dt.date(2023, 11, 1), dt.date(2023, 11, 1), dt.date(2023, 11, 2)]
t1 = ["A", "B", "C"]
t2 = ["C", "D", "E"]
vals = [10, 20, 30]

df = pd.DataFrame(
data=np.array([dates, t1, t2, vals]).T, columns=["dates", "t1", "t2", "vals"]
)
df.set_index(["dates", "t1", "t2"], inplace=True)

# Exact type match
result = df.loc[(dt.date(2023, 11, 1), "A", "C")]
assert result["vals"] == 10

# TypeError
with pytest.raises(KeyError):
df.loc[(np.datetime64("2023-11-01"), "A", "C")]
Loading