Skip to content

Commit

Permalink
ENH: start tightening up API around integer slicing per #592
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Jan 13, 2012
1 parent 79cc4e0 commit 42ce8ff
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 35 deletions.
2 changes: 1 addition & 1 deletion RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ pandas 0.7.0
- Improve the performance of ``DataFrame.sort_index`` by up to 5x or more
when sorting by multiple columns
- Substantially improve performance of DataFrame and Series constructors when
passed a nested dict or dict, respectively (GH #540)
passed a nested dict or dict, respectively (GH #540, GH #621)
- Modified setup.py so that pip / setuptools will install dependencies (GH
#507, various pull requests)
- Unstack called on DataFrame with non-MultiIndex will return Series (GH
Expand Down
20 changes: 14 additions & 6 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,17 +750,25 @@ def slice_locs(self, start=None, end=None):
"""
if start is None:
beg_slice = 0
elif start in self:
beg_slice = self.get_loc(start)
else:
beg_slice = self.searchsorted(start, side='left')
try:
beg_slice = self.get_loc(start)
except KeyError:
if self.is_monotonic:
beg_slice = self.searchsorted(start, side='left')
else:
raise

if end is None:
end_slice = len(self)
elif end in self:
end_slice = self.get_loc(end) + 1
else:
end_slice = self.searchsorted(end, side='right')
try:
end_slice = self.get_loc(end) + 1
except KeyError:
if self.is_monotonic:
end_slice = self.searchsorted(end, side='right')
else:
raise

return beg_slice, end_slice

Expand Down
74 changes: 54 additions & 20 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,29 +207,44 @@ def _convert_to_indexer(self, obj, axis=0):
raise AmbiguousIndexError with integer labels?
- No, prefer label-based indexing
"""
index = self.obj._get_axis(axis)
labels = self.obj._get_axis(axis)

try:
return index.get_loc(obj)
return labels.get_loc(obj)
except (KeyError, TypeError):
pass

is_int_index = _is_integer_index(index)
is_int_index = _is_integer_index(labels)
if isinstance(obj, slice):
if _is_label_slice(index, obj):
i, j = index.slice_locs(obj.start, obj.stop)

if obj.step is not None:
raise Exception('Non-zero step not supported with '
'label-based slicing')
return slice(i, j)
int_slice = _is_integer_slice(obj)
null_slice = obj.start is None and obj.stop is None
# could have integers in the first level of the MultiIndex
position_slice = (int_slice
and not labels.inferred_type == 'integer'
and not isinstance(labels, MultiIndex))

if null_slice or position_slice:
slicer = obj
else:
return obj
try:
i, j = labels.slice_locs(obj.start, obj.stop)
slicer = slice(i, j, obj.step)
except Exception:
if _is_integer_slice(obj):
if labels.inferred_type == 'integer':
raise
slicer = obj
else:
raise

return slicer

elif _is_list_like(obj):
objarr = _asarray_tuplesafe(obj)

if objarr.dtype == np.bool_:
if not obj.index.equals(index):
if not obj.index.equals(labels):
raise IndexingError('Cannot use boolean index with '
'misaligned or unequal labels')
return objarr
Expand All @@ -238,7 +253,7 @@ def _convert_to_indexer(self, obj, axis=0):
if _is_integer_dtype(objarr) and not is_int_index:
return objarr

indexer = index.get_indexer(objarr)
indexer = labels.get_indexer(objarr)
mask = indexer == -1
if mask.any():
raise KeyError('%s not in index' % objarr[mask])
Expand All @@ -247,7 +262,7 @@ def _convert_to_indexer(self, obj, axis=0):
else:
if com.is_integer(obj) and not is_int_index:
return obj
return index.get_loc(obj)
return labels.get_loc(obj)

def _tuplify(self, loc):
tup = [slice(None, None) for _ in range(self.ndim)]
Expand All @@ -259,21 +274,40 @@ def _get_slice_axis(self, slice_obj, axis=0):

axis_name = obj._get_axis_name(axis)
labels = getattr(obj, axis_name)
if _is_label_slice(labels, slice_obj):
i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
slicer = slice(i, j)

if slice_obj.step is not None:
raise Exception('Non-zero step not supported with label-based '
'slicing')
else:
int_slice = _is_integer_slice(slice_obj)

null_slice = slice_obj.start is None and slice_obj.stop is None
# could have integers in the first level of the MultiIndex
position_slice = (int_slice and not labels.inferred_type == 'integer'
and not isinstance(labels, MultiIndex))
if null_slice or position_slice:
slicer = slice_obj
else:
try:
i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
slicer = slice(i, j, slice_obj.step)
except Exception:
if _is_integer_slice(slice_obj):
if labels.inferred_type == 'integer':
raise
slicer = slice_obj
else:
raise

if not _need_slice(slice_obj):
return obj

return obj._slice(slicer, axis=axis)

def _is_integer_slice(obj):
def _crit(v):
return v is None or com.is_integer(v)

both_none = obj.start is None and obj.stop is None

return not both_none and (_crit(obj.start) and _crit(obj.stop))

class _SeriesIndexer(_NDFrameIndexer):
"""
Class to support fancy indexing, potentially using labels
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,6 @@ def _init_dict(self, data, axes, dtype=None):
if dtype is not None:
v = v.astype(dtype)
values = v.values

# if values.ndim == 2:
# values = values[None, :, :]

reshaped_data[item] = values

# segregates dtypes and forms blocks matching to columns
Expand Down
34 changes: 31 additions & 3 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,36 @@ def test_getitem_fancy_2d(self):

def test_getitem_fancy_slice_integers_step(self):
df = DataFrame(np.random.randn(10, 5))
self.assertRaises(Exception, df.ix.__getitem__, slice(0, 8, 2))
self.assertRaises(Exception, df.ix.__setitem__, slice(0, 8, 2), np.nan)

# this is OK
result = df.ix[:8:2]
df.ix[:8:2] = np.nan
self.assert_(isnull(df.ix[:8:2]).values.all())

def test_getitem_setitem_integer_slice_keyerrors(self):
df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2))

# this is OK
cp = df.copy()
cp.ix[4:10] = 0
self.assert_((cp.ix[4:10] == 0).values.all())

# so is this
cp = df.copy()
cp.ix[3:11] = 0
self.assert_((cp.ix[3:11] == 0).values.all())

result = df.ix[4:10]
result2 = df.ix[3:11]
expected = df.reindex([4, 6, 8, 10])

assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)

# non-monotonic, raise KeyError
df2 = df[::-1]
self.assertRaises(KeyError, df2.ix.__getitem__, slice(3, 11))
self.assertRaises(KeyError, df2.ix.__setitem__, slice(3, 11), 0)

def test_setitem_fancy_2d(self):
f = self.frame
Expand Down Expand Up @@ -1659,7 +1687,7 @@ def test_eng_float_formatter(self):

repr(self.frame)

com.set_printoptions(precision=4)
com.reset_printoptions()

def test_repr_tuples(self):
buf = StringIO()
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def test_getitem_toplevel(self):
assert_frame_equal(result, expected)
assert_frame_equal(result, result2)

def test_getitem_slice_integers(self):
def test_getitem_setitem_slice_integers(self):
index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
labels=[[0, 0, 1, 1, 2, 2],
[0, 1, 0, 1, 0, 1]])
Expand All @@ -246,12 +246,18 @@ def test_getitem_slice_integers(self):
exp = frame[2:]
assert_frame_equal(res, exp)

frame.ix[1:2] = 7
self.assert_((frame.ix[1:2] == 7).values.all())

series = Series(np.random.randn(len(index)), index=index)

res = series.ix[1:2]
exp = series[2:]
assert_series_equal(res, exp)

series.ix[1:2] = 7
self.assert_((series.ix[1:2] == 7).values.all())

def test_getitem_int(self):
levels = [[0, 1], [0, 1, 2]]
labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
Expand Down

0 comments on commit 42ce8ff

Please sign in to comment.