From 3195a8457d98ef252db2c4f49bdbff27a9fbd956 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 19 Oct 2025 20:12:18 +0100 Subject: [PATCH 1/2] Fix Index.get_indexer for new string dtype and missing value --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/base.py | 7 +++++++ pandas/tests/indexes/ranges/test_indexing.py | 11 +++++++++++ 3 files changed, 19 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 470129d6d860b..e06fa494e5faf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1039,6 +1039,7 @@ Indexing - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) +- Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`) - Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`) - Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 84ab9c6ec9b09..69e937ce85e42 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6619,6 +6619,13 @@ def _maybe_cast_listlike_indexer(self, target) -> Index: # If we started with a list-like, avoid inference to string dtype if self # is object dtype (coercing to string dtype will alter the missing values) target_index = Index(target, dtype=self.dtype) + elif ( + not hasattr(target, "dtype") + and isinstance(self.dtype, StringDtype) + and self.dtype.na_value is np.nan + ): + # Fill missing values to ensure consistent missing value representation + target_index = target_index.fillna(np.nan) return target_index @final diff --git a/pandas/tests/indexes/ranges/test_indexing.py b/pandas/tests/indexes/ranges/test_indexing.py index 6202074a11d78..695be18043da9 100644 --- a/pandas/tests/indexes/ranges/test_indexing.py +++ b/pandas/tests/indexes/ranges/test_indexing.py @@ -46,6 +46,17 @@ def test_get_indexer_decreasing(self, stop): expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_missing_value_casting_string_dtype(self): + # GH#55833 + idx = Index(["a", "b", None]) + result = idx.get_indexer([None]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = idx.get_indexer([None, True]) + expected = np.array([2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + class TestTake: def test_take_preserve_name(self): From 019c7cc6b4a972281a22715fdc03ebfa0ab5c9e0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 19 Oct 2025 21:05:52 +0100 Subject: [PATCH 2/2] Fixup --- pandas/core/indexes/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 69e937ce85e42..c716bd4e636bd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6623,6 +6623,7 @@ def _maybe_cast_listlike_indexer(self, target) -> Index: not hasattr(target, "dtype") and isinstance(self.dtype, StringDtype) and self.dtype.na_value is np.nan + and using_string_dtype() ): # Fill missing values to ensure consistent missing value representation target_index = target_index.fillna(np.nan)