diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fc49177a4736b..178eab20c01c7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -337,6 +337,7 @@ Indexing - Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) - Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`) +- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`) - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index 69680e472bbc2..e1ea1fbf9bd46 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -57,7 +57,14 @@ cdef class {{name}}Engine(IndexEngine): with warnings.catch_warnings(): # e.g. if values is float64 and `val` is a str, suppress warning warnings.filterwarnings("ignore", category=FutureWarning) + {{if name in {'Float64', 'Float32'} }} + if util.is_nan(val): + indexer = np.isnan(values) + else: + indexer = values == val + {{else}} indexer = values == val + {{endif}} except TypeError: # if the equality above returns a bool, cython will raise TypeError # when trying to cast it to ndarray diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 205bbcc07fc76..f7ad1bbc3f14a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2292,14 +2292,6 @@ def _isnan(self): values.fill(False) return values - @cache_readonly - @final - def _nan_idxs(self): - if self._can_hold_na: - return self._isnan.nonzero()[0] - else: - return np.array([], dtype=np.intp) - @cache_readonly def hasnans(self) -> bool: """ @@ -3224,6 +3216,9 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError as err: raise KeyError(key) from err + if is_scalar(key) and isna(key) and not self.hasnans: + raise KeyError(key) + if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a432b3952666e..777fc1c7c4ad2 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -341,15 +341,6 @@ def get_loc(self, key, method=None, tolerance=None): if is_bool(key): # Catch this to avoid accidentally casting to 1.0 raise KeyError(key) - - if is_float(key) and np.isnan(key): - nan_idxs = self._nan_idxs - if not len(nan_idxs): - raise KeyError(key) - elif len(nan_idxs) == 1: - return nan_idxs[0] - return nan_idxs - return super().get_loc(key, method=method, tolerance=tolerance) # ---------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a24c8e252d234..090e21be254e3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -316,16 +316,13 @@ def test_nat(self, tz_naive_fixture): idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 33f927bdd7c04..5f2f8f75045bb 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -1,7 +1,15 @@ import numpy as np import pytest -from pandas import Float64Index, Index, Int64Index, RangeIndex, Series, UInt64Index +from pandas import ( + Float64Index, + Index, + Int64Index, + RangeIndex, + Series, + Timestamp, + UInt64Index, +) import pandas._testing as tm @@ -102,13 +110,10 @@ def test_get_loc_na(self): idx = Float64Index([np.nan, 1, np.nan]) assert idx.get_loc(1) == 1 - # FIXME: dont leave commented-out # representable by slice [0:2:2] - # pytest.raises(KeyError, idx.slice_locs, np.nan) - sliced = idx.slice_locs(np.nan) - assert isinstance(sliced, tuple) - assert sliced == (0, 3) - + msg = "'Cannot get left slice bound for non-unique label: nan'" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) # not representable by slice idx = Float64Index([np.nan, 1, np.nan, np.nan]) assert idx.get_loc(1) == 1 @@ -128,6 +133,14 @@ def test_get_loc_missing_nan(self): # listlike/non-hashable raises TypeError idx.get_loc([np.nan]) + @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) + @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) + def test_get_loc_float_index_nan_with_method(self, vals, method): + # GH#39382 + idx = Index(vals) + with pytest.raises(KeyError, match="nan"): + idx.get_loc(np.nan, method=method) + class TestGetIndexer: def test_get_indexer(self): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 645019f1ac063..fd0a77bf7930b 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -278,14 +278,12 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = PeriodIndex(["2011-01-01", "NaT"], freq="D") assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_freq_setter_deprecated(self): # GH 20678 diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 3578174e17141..83b8fcc1b15fe 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -217,14 +217,12 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = TimedeltaIndex(["1 days", "NaT"]) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True - tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])