From a2374b6a0ffe1f6898fe86fcde3a5c9f5a0de2c4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:11:55 -0700 Subject: [PATCH 1/5] PERF: RangeIndex.argmin/argmax --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 35 ++++++++++++++++++++++- pandas/tests/indexes/ranges/test_range.py | 24 ++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e43f6fdf9c173..29653ea487c9b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -274,6 +274,7 @@ Performance improvements - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`?`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 24f53f16e1985..e9a7c18d62cec 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -502,7 +502,7 @@ def copy(self, name: Hashable | None = None, deep: bool = False) -> Self: new_index = self._rename(name=name) return new_index - def _minmax(self, meth: str) -> int | float: + def _minmax(self, meth: Literal["min", "max"]) -> int | float: no_steps = len(self) - 1 if no_steps == -1: return np.nan @@ -523,6 +523,39 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int | float: nv.validate_max(args, kwargs) return self._minmax("max") + def _argminmax( + self, + meth: Literal["min", "max"], + axis=None, + skipna: bool = True, + *args, + **kwargs, + ) -> int: + getattr(nv, f"validate_arg{meth}")(args, kwargs) + nv.validate_minmax_axis(axis) + if len(self) == 0: + return getattr(super(), f"arg{meth}")( + *args, axis=axis, skipna=skipna, **kwargs + ) + elif meth == "min": + if self.step > 0: + return 0 + else: + return len(self) - 1 + elif meth == "max": + if self.step > 0: + return len(self) - 1 + else: + return 0 + else: + raise ValueError(f"{meth=} must be max or min") + + def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + return self._argminmax("min", *args, axis=axis, skipna=skipna, **kwargs) + + def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + return self._argminmax("max", *args, axis=axis, skipna=skipna, **kwargs) + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: """ Returns the indices that would sort the index and its diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 8c24ce5d699d5..b51cec981ffff 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -695,3 +695,27 @@ def test_getitem_boolmask_wrong_length(): ri = RangeIndex(4, name="foo") with pytest.raises(IndexError, match="Boolean index has wrong length"): ri[[True]] + + +@pytest.mark.parametrize( + "rng", + [ + range(0, 5, 1), + range(0, 5, 2), + range(10, 15, 1), + range(10, 5, -1), + range(10, 5, -2), + range(5, 0, -1), + ], +) +@pytest.mark.parametrize("meth", ["argmax", "argmin"]) +def test_arg_min_max(rng, meth): + ri = RangeIndex(rng) + idx = Index(list(rng)) + assert getattr(ri, meth)() == getattr(idx, meth)() + + +@pytest.mark.parametrize("meth", ["argmin", "argmax"]) +def test_empty_argmin_argmax_raises(meth): + with pytest.raises(ValueError, match=f"attempt to get {meth} of an empty sequence"): + assert getattr(RangeIndex(0), meth)() From 4ead203ff0e5581da916831d190eeb5f879ba735 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:14:13 -0700 Subject: [PATCH 2/5] whatsnew number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 29653ea487c9b..02e8783e2b6ce 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -274,7 +274,7 @@ Performance improvements - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) -- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`?`) +- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) From e5355ccc47be9d19634508ba5d89d36c042eda0c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 17:06:21 -0700 Subject: [PATCH 3/5] Fix some typing --- pandas/core/indexes/range.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 39d7c4f2ac48e..83b1f0d8cf938 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -541,14 +541,12 @@ def _argminmax( meth: Literal["min", "max"], axis=None, skipna: bool = True, - *args, - **kwargs, ) -> int: - getattr(nv, f"validate_arg{meth}")(args, kwargs) nv.validate_minmax_axis(axis) if len(self) == 0: return getattr(super(), f"arg{meth}")( - *args, axis=axis, skipna=skipna, **kwargs + axis=axis, + skipna=skipna, ) elif meth == "min": if self.step > 0: @@ -564,9 +562,11 @@ def _argminmax( raise ValueError(f"{meth=} must be max or min") def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + nv.validate_argmin(args, kwargs) return self._argminmax("min", *args, axis=axis, skipna=skipna, **kwargs) def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + nv.validate_argmax(args, kwargs) return self._argminmax("max", *args, axis=axis, skipna=skipna, **kwargs) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: From fc39378308581b44107d146f3232a39d4fbc70c1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Mar 2024 17:26:59 -0700 Subject: [PATCH 4/5] Remove args, kwargs from signature --- pandas/core/indexes/range.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 83b1f0d8cf938..2d94b7a4b03a7 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -563,11 +563,11 @@ def _argminmax( def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: nv.validate_argmin(args, kwargs) - return self._argminmax("min", *args, axis=axis, skipna=skipna, **kwargs) + return self._argminmax("min", axis=axis, skipna=skipna) def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: nv.validate_argmax(args, kwargs) - return self._argminmax("max", *args, axis=axis, skipna=skipna, **kwargs) + return self._argminmax("max", axis=axis, skipna=skipna) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: """ From a06713a05f735bc98d83b83d3e2a19ffaf155ac7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 12:35:46 -0700 Subject: [PATCH 5/5] Remove assert --- pandas/tests/indexes/ranges/test_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index f75facb082e33..00655f5546df8 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -784,7 +784,7 @@ def test_arg_min_max(rng, meth): @pytest.mark.parametrize("meth", ["argmin", "argmax"]) def test_empty_argmin_argmax_raises(meth): with pytest.raises(ValueError, match=f"attempt to get {meth} of an empty sequence"): - assert getattr(RangeIndex(0), meth)() + getattr(RangeIndex(0), meth)() def test_getitem_integers_return_rangeindex():