diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 995e7676afbca..f64c9ffa9c7fa 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1058,6 +1058,7 @@ Indexing - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) - Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`) +- Bug in :meth:`MultiIndex._get_loc_level` raising ``TypeError`` when using a ``datetime.date`` key on a level containing incompatible objects (:issue:`55969`) - Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`) - Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1cc1928136da1..d5ec88ed82695 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -8,6 +8,7 @@ Iterable, Sequence, ) +import datetime from functools import wraps from itertools import zip_longest from sys import getsizeof @@ -3460,6 +3461,23 @@ def maybe_mi_droplevels(indexer, levels): return new_index + # type-check key against level(s), raise error if mismatch + if isinstance(key, tuple): + for i, k in enumerate(key): + if not self._is_key_type_compatible(k, i): + raise TypeError( + f"Type mismatch at index level {i}: " + f"expected {type(self.levels[i][0]).__name__}, " + f"got {type(k).__name__}" + ) + else: + if not self._is_key_type_compatible(key, level): + raise TypeError( + f"Type mismatch at index level {level}: " + f"expected {type(self.levels[level][0]).__name__}, " + f"got {type(key).__name__}" + ) + if isinstance(level, (tuple, list)): if len(key) != len(level): raise AssertionError( @@ -3591,6 +3609,49 @@ def maybe_mi_droplevels(indexer, levels): return indexer, result_index + def _is_key_type_compatible(self, key, level): + """ + Return True if the key type is compatible with the type of the level's values. + + Compatible types: + - int ↔ np.integer + - float ↔ np.floating + - str ↔ np.str_ + - datetime.date ↔ datetime.datetime + - slices (for partial indexing) + """ + if len(self.levels[level]) == 0: + return True # nothing to compare + + level_val = self.levels[level][0] + level_type = type(level_val) + + # Same type + if isinstance(key, level_type): + return True + + # NumPy integer / float / string compatibility + if isinstance(level_val, np.integer) and isinstance(key, int): + return True + if isinstance(level_val, np.floating) and isinstance(key, float): + return True + if isinstance(level_val, np.str_) and isinstance(key, str): + return True + + # Allow subclasses of datetime.date for datetime levels + if isinstance(level_val, datetime.date) and isinstance(key, datetime.date): + return True + + # Allow slices (used internally for partial selection) + if isinstance(key, slice): + return True + + # Allow any NumPy generic types for flexibility + if isinstance(key, np.generic): + return True + + return False + def _get_level_indexer( self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None ): diff --git a/pandas/tests/indexes/multi/test_datetime_indexing.py b/pandas/tests/indexes/multi/test_datetime_indexing.py new file mode 100644 index 0000000000000..77cc9f9fb14bc --- /dev/null +++ b/pandas/tests/indexes/multi/test_datetime_indexing.py @@ -0,0 +1,26 @@ +import datetime as dt + +import numpy as np +import pytest + +import pandas as pd + + +def test_multiindex_date_npdatetime_mismatch_raises(): + dates = [dt.date(2023, 11, 1), dt.date(2023, 11, 1), dt.date(2023, 11, 2)] + t1 = ["A", "B", "C"] + t2 = ["C", "D", "E"] + vals = [10, 20, 30] + + df = pd.DataFrame( + data=np.array([dates, t1, t2, vals]).T, columns=["dates", "t1", "t2", "vals"] + ) + df.set_index(["dates", "t1", "t2"], inplace=True) + + # Exact type match + result = df.loc[(dt.date(2023, 11, 1), "A", "C")] + assert result["vals"] == 10 + + # TypeError + with pytest.raises(KeyError): + df.loc[(np.datetime64("2023-11-01"), "A", "C")]