Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

API: allow the iloc indexer to run off the end and not raise IndexError (GH6296) #6299

Merged
merged 1 commit into from Feb 8, 2014
Jump to file or symbol
Failed to load files and symbols.
+156 −56
Split
View
@@ -273,25 +273,6 @@ For getting fast access to a scalar (equiv to the prior method)
df.iat[1,1]
-There is one signficant departure from standard python/numpy slicing semantics.
-python/numpy allow slicing past the end of an array without an associated
-error.
-
-.. ipython:: python
-
- # these are allowed in python/numpy.
- x = list('abcdef')
- x[4:10]
- x[8:10]
-
-Pandas will detect this and raise ``IndexError``, rather than return an empty
-structure.
-
-::
-
- >>> df.iloc[:,8:10]
- IndexError: out-of-bounds on slice (end)
-
Boolean Indexing
~~~~~~~~~~~~~~~~
View
@@ -77,8 +77,9 @@ of multi-axis indexing.
See more at :ref:`Selection by Label <indexing.label>`
- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of
- the axis), will raise ``IndexError`` when the requested indicies are out of
- bounds. Allowed inputs are:
+ the axis), will raise ``IndexError`` if a single index is requested and it
+ is out-of-bounds, otherwise it will conform the bounds to size of the object.
+ Allowed inputs are:
- An integer e.g. ``5``
- A list or array of integers ``[4, 3, 0]``
@@ -420,12 +421,19 @@ python/numpy allow slicing past the end of an array without an associated error.
x[4:10]
x[8:10]
-Pandas will detect this and raise ``IndexError``, rather than return an empty structure.
+- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+ indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+ values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+ ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
-::
+ .. ipython:: python
- >>> df.iloc[:,3:6]
- IndexError: out-of-bounds on slice (end)
+ df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+ df
+ df.iloc[[4,5,6]]
+ df.iloc[4:6]
+ df.iloc[:,2:3]
+ df.iloc[:,1:3]
.. _indexing.basics.partial_setting:
View
@@ -56,6 +56,10 @@ New features
API Changes
~~~~~~~~~~~
+- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+ indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+ values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+ ``IndexError`` (:issue:`6296`)
Experimental Features
~~~~~~~~~~~~~~~~~~~~~
View
@@ -15,6 +15,20 @@ Highlights include:
API changes
~~~~~~~~~~~
+- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+ indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+ values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+ ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
+
+ .. ipython:: python
+
+ df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+ df
+ df.iloc[[4,5,6]]
+ df.iloc[4:6]
+ df.iloc[:,2:3]
+ df.iloc[:,1:3]
+
Prior Version Deprecations/Changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
View
@@ -1756,10 +1756,6 @@ def head(self, n=5):
l = len(self)
if l == 0 or n==0:
return self
- if n > l:
- n = l
- elif n < -l:
- n = -l
return self.iloc[:n]
def tail(self, n=5):
@@ -1769,10 +1765,6 @@ def tail(self, n=5):
l = len(self)
if l == 0 or n == 0:
return self
- if n > l:
- n = l
- elif n < -l:
- n = -l
return self.iloc[-n:]
#----------------------------------------------------------------------
View
@@ -621,9 +621,15 @@ def __getitem__(self, key):
if com._is_bool_indexer(key):
key = np.asarray(key)
- result = arr_idx[key]
- if result.ndim > 1:
- return result
+ try:
+ result = arr_idx[key]
+ if result.ndim > 1:
+ return result
+ except (IndexError):
+ if not len(key):
+ result = []
+ else:
+ raise
return Index(result, name=self.name)
View
@@ -73,6 +73,29 @@ def _get_loc(self, key, axis=0):
return self.obj._ixs(key, axis=axis)
def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
+
+ # make out-of-bounds into bounds of the object
+ if typ == 'iloc':
+ ax = self.obj._get_axis(axis)
+ l = len(ax)
+ start = obj.start
+ stop = obj.stop
+ step = obj.step
+ if start is not None:
+ # degenerate to return nothing
+ if start >= l:
+ return self._getitem_axis(tuple(),axis=axis)
+
+ # equiv to a null slice
+ elif start <= -l:
+ start = None
+ if stop is not None:
+ if stop > l:
+ stop = None
+ elif stop <= -l:
+ stop = None
+ obj = slice(start,stop,step)
+
return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error,
typ=typ)
@@ -1188,14 +1211,23 @@ def _getitem_tuple(self, tup):
pass
retval = self.obj
+ axis=0
for i, key in enumerate(tup):
if i >= self.obj.ndim:
raise IndexingError('Too many indexers')
if _is_null_slice(key):
+ axis += 1
continue
- retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
+ retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
+
+ # if the dim was reduced, then pass a lower-dim the next time
+ if retval.ndim<self.ndim:
+ axis -= 1
+
+ # try to get for the next axis
+ axis += 1
return retval
@@ -1224,17 +1256,28 @@ def _getitem_axis(self, key, axis=0):
# a single integer or a list of integers
else:
+ ax = self.obj._get_axis(axis)
if _is_list_like(key):
+ # coerce the key to not exceed the maximum size of the index
+ arr = np.array(key)
+ l = len(ax)
+ if len(arr) and (arr.max() >= l or arr.min() <= -l):
+ key = arr[(arr>-l) & (arr<l)]
+
# force an actual list
key = list(key)
+
else:
key = self._convert_scalar_indexer(key, axis)
if not com.is_integer(key):
raise TypeError("Cannot index by location index with a "
"non-integer key")
+ if key > len(ax):
+ raise IndexError("single indexer is out-of-bounds")
+
return self._get_loc(key, axis=axis)
def _convert_to_indexer(self, obj, axis=0, is_setter=False):
View
@@ -3246,7 +3246,7 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None,
pandas-indexer with -1's only.
"""
# trying to reindex on an axis with duplicates
- if not allow_dups and not self.axes[axis].is_unique:
+ if not allow_dups and not self.axes[axis].is_unique and len(indexer):
raise ValueError("cannot reindex from a duplicate axis")
if not self.is_consolidated():
@@ -873,7 +873,7 @@ def test_equals(self):
s2[0] = 9.9
self.assert_(not s1.equals(s2))
-
+
idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
s1 = Series([1, 2, np.nan], index=idx)
s2 = s1.copy()
@@ -900,17 +900,17 @@ def test_equals(self):
# different dtype
different = df1.copy()
different['floats'] = different['floats'].astype('float32')
- self.assert_(not df1.equals(different))
+ self.assert_(not df1.equals(different))
# different index
different_index = -index
different = df2.set_index(different_index)
- self.assert_(not df1.equals(different))
+ self.assert_(not df1.equals(different))
# different columns
different = df2.copy()
different.columns = df2.columns[::-1]
- self.assert_(not df1.equals(different))
+ self.assert_(not df1.equals(different))
# DatetimeIndex
index = pd.date_range('2000-1-1', periods=10, freq='T')
@@ -339,6 +339,72 @@ def test_repeated_getitem_dups(self):
result = df.loc[:,0].loc['A']
assert_series_equal(result,expected)
+ def test_iloc_exceeds_bounds(self):
+
+ # GH6296
+ # iloc should allow indexers that exceed the bounds
+ df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE'))
+ expected = df
+ result = df.iloc[:,[0,1,2,3,4,5]]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[[1,30]]
+ expected = df.iloc[[1]]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[[1,-30]]
+ expected = df.iloc[[1]]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[:,4:10]
+ expected = df.iloc[:,4:]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[:,-4:-10]
+ expected = df.iloc[:,-4:]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[[100]]
+ expected = DataFrame(columns=df.columns)
+ assert_frame_equal(result,expected)
+
+ # still raise on a single indexer
+ def f():
+ df.iloc[30]
+ self.assertRaises(IndexError, f)
+
+ s = df['A']
+ result = s.iloc[[100]]
+ expected = Series()
+ assert_series_equal(result,expected)
+
+ result = s.iloc[[-100]]
+ expected = Series()
+ assert_series_equal(result,expected)
+
+ # slice
+ result = s.iloc[18:30]
+ expected = s.iloc[18:]
+ assert_series_equal(result,expected)
+
+ # doc example
+ df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+ result = df.iloc[[4,5,6]]
+ expected = df.iloc[[4]]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[4:6]
+ expected = df.iloc[[4]]
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[:,2:3]
+ expected = DataFrame(index=df.index)
+ assert_frame_equal(result,expected)
+
+ result = df.iloc[:,1:3]
+ expected = df.iloc[:,[1]]
+ assert_frame_equal(result,expected)
+
def test_iloc_getitem_int(self):
# integer
@@ -442,14 +508,6 @@ def test_iloc_getitem_multiindex(self):
xp = df.xs('b',drop_level=False)
assert_frame_equal(rs,xp)
- def test_iloc_getitem_out_of_bounds(self):
-
- # out-of-bounds slice
- self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
- self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
- self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(1,5,None)]))
- self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(-5,3,None)]))
-
def test_iloc_setitem(self):
df = self.frame_ints
@@ -738,12 +796,6 @@ def test_iloc_getitem_frame(self):
expected = df.ix[[2,4,6,8]]
assert_frame_equal(result, expected)
- # out-of-bounds slice
- self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
- self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
- self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(1,11,None)]))
- self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(-11,3,None)]))
-
# try with labelled frame
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD'))