diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 9c05019c70396..2b2302a796730 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -86,6 +86,12 @@ def time_iter_dec(self): for _ in self.idx_dec: pass + def time_sort_values_asc(self): + self.idx_inc.sort_values() + + def time_sort_values_des(self): + self.idx_inc.sort_values(ascending=False) + class IndexEquals: def setup(self): diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 17b9e49bcad6a..07d1d39ec4221 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -427,6 +427,7 @@ Performance improvements - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`) - Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`) - Improved performance of :meth:`pandas.read_csv` with ``memory_map=True`` when file encoding is UTF-8 (:issue:`43787`) +- Performance improvement in :meth:`RangeIndex.sort_values` overriding :meth:`Index.sort_values` (:issue:`43666`) - Performance improvement in :meth:`RangeIndex.insert` (:issue:`43988`) - Performance improvement in :meth:`Index.insert` (:issue:`43953`) - Performance improvement in :meth:`DatetimeIndex.tolist` (:issue:`43823`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c5cfa4c2d1fcd..05047540c6ccd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5266,7 +5266,6 @@ def asof_locs(self, where: Index, mask: np.ndarray) -> npt.NDArray[np.intp]: return result - @final def sort_values( self, return_indexer: bool = False, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4def2e4b93553..c5ba928a09cfa 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -522,6 +522,38 @@ def equals(self, other: object) -> bool: return self._range == other._range return super().equals(other) + def sort_values( + self, + return_indexer: bool = False, + ascending: bool = True, + na_position: str = "last", + key: Callable | None = None, + ): + sorted_index = self + indexer = RangeIndex(range(len(self))) + if key is not None: + return super().sort_values( + return_indexer=return_indexer, + ascending=ascending, + na_position=na_position, + key=key, + ) + else: + sorted_index = self + if ascending: + if self.step < 0: + sorted_index = self[::-1] + indexer = indexer[::-1] + else: + if self.step > 0: + sorted_index = self[::-1] + indexer = indexer = indexer[::-1] + + if return_indexer: + return sorted_index, indexer + else: + return sorted_index + # -------------------------------------------------------------------- # Set Operations diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 7591620de168a..d58dff191cc73 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -607,3 +607,11 @@ def test_isin_range(self, base): result = base.isin(values) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) + + def test_sort_values_key(self): + # GH#43666 + sort_order = {8: 2, 6: 0, 4: 8, 2: 10, 0: 12} + values = RangeIndex(0, 10, 2) + result = values.sort_values(key=lambda x: x.map(sort_order)) + expected = Index([4, 8, 6, 0, 2], dtype="int64") + tm.assert_index_equal(result, expected, check_exact=True) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 74bcb589b008a..8f0991eb98bb5 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -68,7 +68,7 @@ def test_factorize(self, index_or_series_obj, sort): expected_codes = np.asarray(expected_codes, dtype=np.intp) tm.assert_numpy_array_equal(result_codes, expected_codes) - tm.assert_index_equal(result_uniques, expected_uniques) + tm.assert_index_equal(result_uniques, expected_uniques, exact=True) def test_series_factorize_na_sentinel_none(self): # GH#35667