From 808526401b34f237b92620d9275b9070f5345340 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 7 Jun 2019 23:23:44 +0200 Subject: [PATCH 1/3] REF: Refactor signature for RangeIndex._simple_new --- pandas/core/indexes/base.py | 5 +-- pandas/core/indexes/range.py | 61 ++++++++++++++---------------- pandas/tests/indexes/test_range.py | 8 +++- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4fb9c4197109f..7a027c1c0b647 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -270,11 +270,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, data = data.to_numpy() # range - if isinstance(data, RangeIndex): + if isinstance(data, (RangeIndex, range)): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) - elif isinstance(data, range): - return RangeIndex.from_range(data, copy=copy, dtype=dtype, - name=name) # categorical elif is_categorical_dtype(data) or is_categorical_dtype(dtype): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7daeb9b644a9b..22033a4deba2d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -40,7 +40,7 @@ class RangeIndex(Int64Index): Parameters ---------- - start : int (default: 0), or other RangeIndex instance + start : int (default: 0), range or RangeIndex instance If int and "stop" is not given, interpreted as "stop" instead. stop : int (default: 0) step : int (default: 1) @@ -82,16 +82,14 @@ def __new__(cls, start=None, stop=None, step=None, "removed in a future version.", FutureWarning, stacklevel=2) if fastpath: - return cls._simple_new(start, stop, step, name=name) + return cls._simple_new(range(start, stop, step), name=name) - cls._validate_dtype(dtype) - - # RangeIndex - if isinstance(start, RangeIndex): - if name is None: - name = start.name - return cls._simple_new(name=name, - **dict(start._get_data_as_items())) + # RangeIndex, range + if isinstance(start, (RangeIndex, range)): + if isinstance(start, RangeIndex): + name = start.name if name is None else name + start = start._range + return cls._simple_new(start, dtype=dtype, name=name) # validate the arguments if com._all_none(start, stop, step): @@ -108,10 +106,11 @@ def __new__(cls, start=None, stop=None, step=None, if step == 0: raise ValueError("Step must not be zero") - return cls._simple_new(start, stop, step, name) + rng = range(start, stop, step) + return cls._simple_new(rng, dtype=dtype, name=name) @classmethod - def from_range(cls, data, name=None, dtype=None, **kwargs): + def from_range(cls, data, name=None, dtype=None): """ Create RangeIndex from a range object. @@ -123,27 +122,22 @@ def from_range(cls, data, name=None, dtype=None, **kwargs): raise TypeError( '{0}(...) must be called with object coercible to a ' 'range, {1} was passed'.format(cls.__name__, repr(data))) - - start, stop, step = data.start, data.stop, data.step - return cls(start, stop, step, dtype=dtype, name=name, **kwargs) + return cls._simple_new(data, dtype=dtype, name=name) @classmethod - def _simple_new(cls, start, stop=None, step=None, name=None, - dtype=None, **kwargs): + def _simple_new(cls, values, name=None, dtype=None, **kwargs): result = object.__new__(cls) + cls._validate_dtype(dtype) + # handle passed None, non-integers - if start is None and stop is None: + if values is None: # empty - start, stop, step = 0, 0, 1 - - if start is None or not is_integer(start): - try: - return cls(start, stop, step, name=name, **kwargs) - except TypeError: - return Index(start, stop, step, name=name, **kwargs) + values = range(0, 0, 1) + elif not isinstance(values, range): + return Index(values, dtype=dtype, name=name, **kwargs) - result._range = range(start, stop or 0, step or 1) + result._range = values result.name = name for k, v in kwargs.items(): @@ -360,8 +354,7 @@ def tolist(self): def _shallow_copy(self, values=None, **kwargs): if values is None: name = kwargs.get("name", self.name) - return self._simple_new( - name=name, **dict(self._get_data_as_items())) + return self._simple_new(self._range, name=name) else: kwargs.setdefault('name', self.name) return self._int64index._shallow_copy(values, **kwargs) @@ -480,11 +473,13 @@ def intersection(self, other, sort=False): tmp_start = first.start + (second.start - first.start) * \ first.step // gcd * s new_step = first.step * second.step // gcd - new_index = self._simple_new(tmp_start, int_high, new_step) + new_range = range(tmp_start, int_high, new_step) + new_index = self._simple_new(new_range) # adjust index to limiting interval new_start = new_index._min_fitting_element(int_low) - new_index = self._simple_new(new_start, new_index.stop, new_index.step) + new_range = range(new_start, new_index.stop, new_index.step) + new_index = self._simple_new(new_range) if (self.step < 0 and other.step < 0) is not (new_index.step < 0): new_index = new_index[::-1] @@ -640,10 +635,12 @@ def __floordiv__(self, other): start = self.start // other step = self.step // other stop = start + len(self) * step - return self._simple_new(start, stop, step, name=self.name) + new_range = range(start, stop, step or 1) + return self._simple_new(new_range, name=self.name) if len(self) == 1: start = self.start // other - return self._simple_new(start, start + 1, 1, name=self.name) + new_range = range(start, start + 1, 1) + return self._simple_new(new_range, name=self.name) return self._int64index // other def all(self) -> bool: diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 6eece0ed8efee..530a514d28fd0 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -94,8 +94,9 @@ def test_constructor_same(self): def test_constructor_range(self): - with pytest.raises(TypeError): - RangeIndex(range(1, 5, 2)) + result = RangeIndex(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) @@ -120,6 +121,9 @@ def test_constructor_range(self): with pytest.raises(TypeError): Index(range(1, 5, 2), dtype='float64') + msg = r'^from_range\(\) got an unexpected keyword argument' + with pytest.raises(TypeError, match=msg): + pd.RangeIndex.from_range(range(10), copy=True) def test_constructor_name(self): # GH12288 From 0678071d36e9d281548dd7b1634282506fb5d880 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 8 Jun 2019 20:02:50 +0200 Subject: [PATCH 2/3] better performance for slices --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/indexes/range.py | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index fd47ca14dc788..64175c9d9892c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -525,7 +525,7 @@ Performance Improvements - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) -- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`) +- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 22033a4deba2d..cc44b4d4c6f6f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -6,7 +6,7 @@ import numpy as np -from pandas._libs import index as libindex, lib +from pandas._libs import index as libindex import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly @@ -604,12 +604,10 @@ def __getitem__(self, key): """ Conserve RangeIndex type for scalar and slice keys. """ - if is_scalar(key): - if not lib.is_integer(key): - raise IndexError("only integers, slices (`:`), " - "ellipsis (`...`), numpy.newaxis (`None`) " - "and integer or boolean " - "arrays are valid indices") + if isinstance(key, slice): + new_range = self._range[key] + return self._simple_new(new_range, name=self.name) + elif is_integer(key): new_key = int(key) try: return self._range[new_key] @@ -617,10 +615,11 @@ def __getitem__(self, key): raise IndexError("index {key} is out of bounds for axis 0 " "with size {size}".format(key=key, size=len(self))) - if isinstance(key, slice): - new_range = self._range[key] - return self.from_range(new_range, name=self.name) - + elif is_scalar(key): + raise IndexError("only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices") # fall back to Int64Index return super().__getitem__(key) From 2bde25b0d9c2fda998d1e625ad346d5b99e27541 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 Jun 2019 09:29:20 +0200 Subject: [PATCH 3/3] Remove option to use bare range in init --- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/range.py | 17 +++++++++-------- pandas/tests/indexes/test_range.py | 6 +++--- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7a027c1c0b647..5bf97f44edeed 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -270,8 +270,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, data = data.to_numpy() # range - if isinstance(data, (RangeIndex, range)): + if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) + elif isinstance(data, range): + return RangeIndex.from_range(data, dtype=dtype, name=name) # categorical elif is_categorical_dtype(data) or is_categorical_dtype(dtype): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index cc44b4d4c6f6f..ab39969af8db0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -40,7 +40,7 @@ class RangeIndex(Int64Index): Parameters ---------- - start : int (default: 0), range or RangeIndex instance + start : int (default: 0), or other RangeIndex instance If int and "stop" is not given, interpreted as "stop" instead. stop : int (default: 0) step : int (default: 1) @@ -84,11 +84,12 @@ def __new__(cls, start=None, stop=None, step=None, if fastpath: return cls._simple_new(range(start, stop, step), name=name) - # RangeIndex, range - if isinstance(start, (RangeIndex, range)): - if isinstance(start, RangeIndex): - name = start.name if name is None else name - start = start._range + cls._validate_dtype(dtype) + + # RangeIndex + if isinstance(start, RangeIndex): + name = start.name if name is None else name + start = start._range return cls._simple_new(start, dtype=dtype, name=name) # validate the arguments @@ -122,14 +123,14 @@ def from_range(cls, data, name=None, dtype=None): raise TypeError( '{0}(...) must be called with object coercible to a ' 'range, {1} was passed'.format(cls.__name__, repr(data))) + + cls._validate_dtype(dtype) return cls._simple_new(data, dtype=dtype, name=name) @classmethod def _simple_new(cls, values, name=None, dtype=None, **kwargs): result = object.__new__(cls) - cls._validate_dtype(dtype) - # handle passed None, non-integers if values is None: # empty diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 530a514d28fd0..3f474b0166b15 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -94,9 +94,9 @@ def test_constructor_same(self): def test_constructor_range(self): - result = RangeIndex(range(1, 5, 2)) - expected = RangeIndex(1, 5, 2) - tm.assert_index_equal(result, expected, exact=True) + msg = "Value needs to be a scalar value, was type " + with pytest.raises(TypeError, match=msg): + result = RangeIndex(range(1, 5, 2)) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2)