ENH: start tightening up API around integer slicing per #592

wesm · wesm · commit 42ce8ff5e7be · 2012-01-12T22:35:49.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -123,7 +123,7 @@ pandas 0.7.0
   - Improve the performance of ``DataFrame.sort_index`` by up to 5x or more
     when sorting by multiple columns
   - Substantially improve performance of DataFrame and Series constructors when
-    passed a nested dict or dict, respectively (GH #540)
+    passed a nested dict or dict, respectively (GH #540, GH #621)
   - Modified setup.py so that pip / setuptools will install dependencies (GH
     #507, various pull requests)
   - Unstack called on DataFrame with non-MultiIndex will return Series (GH
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -750,17 +750,25 @@ def slice_locs(self, start=None, end=None):
         """
         if start is None:
             beg_slice = 0
-        elif start in self:
-            beg_slice = self.get_loc(start)
         else:
-            beg_slice = self.searchsorted(start, side='left')
+            try:
+                beg_slice = self.get_loc(start)
+            except KeyError:
+                if self.is_monotonic:
+                    beg_slice = self.searchsorted(start, side='left')
+                else:
+                    raise
 
         if end is None:
             end_slice = len(self)
-        elif end in self:
-            end_slice = self.get_loc(end) + 1
         else:
-            end_slice = self.searchsorted(end, side='right')
+            try:
+                end_slice = self.get_loc(end) + 1
+            except KeyError:
+                if self.is_monotonic:
+                    end_slice = self.searchsorted(end, side='right')
+                else:
+                    raise
 
         return beg_slice, end_slice
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -207,29 +207,44 @@ def _convert_to_indexer(self, obj, axis=0):
         raise AmbiguousIndexError with integer labels?
         - No, prefer label-based indexing
         """
-        index = self.obj._get_axis(axis)
+        labels = self.obj._get_axis(axis)
 
         try:
-            return index.get_loc(obj)
+            return labels.get_loc(obj)
         except (KeyError, TypeError):
             pass
 
-        is_int_index = _is_integer_index(index)
+        is_int_index = _is_integer_index(labels)
         if isinstance(obj, slice):
-            if _is_label_slice(index, obj):
-                i, j = index.slice_locs(obj.start, obj.stop)
 
-                if obj.step is not None:
-                    raise Exception('Non-zero step not supported with '
-                                    'label-based slicing')
-                return slice(i, j)
+            int_slice = _is_integer_slice(obj)
+            null_slice = obj.start is None and obj.stop is None
+            # could have integers in the first level of the MultiIndex
+            position_slice = (int_slice
+                              and not labels.inferred_type == 'integer'
+                              and not isinstance(labels, MultiIndex))
+
+            if null_slice or position_slice:
+                slicer = obj
             else:
-                return obj
+                try:
+                    i, j = labels.slice_locs(obj.start, obj.stop)
+                    slicer = slice(i, j, obj.step)
+                except Exception:
+                    if _is_integer_slice(obj):
+                        if labels.inferred_type == 'integer':
+                            raise
+                        slicer = obj
+                    else:
+                        raise
+
+            return slicer
+
         elif _is_list_like(obj):
             objarr = _asarray_tuplesafe(obj)
 
             if objarr.dtype == np.bool_:
-                if not obj.index.equals(index):
+                if not obj.index.equals(labels):
                     raise IndexingError('Cannot use boolean index with '
                                         'misaligned or unequal labels')
                 return objarr
@@ -238,7 +253,7 @@ def _convert_to_indexer(self, obj, axis=0):
                 if _is_integer_dtype(objarr) and not is_int_index:
                     return objarr
 
-                indexer = index.get_indexer(objarr)
+                indexer = labels.get_indexer(objarr)
                 mask = indexer == -1
                 if mask.any():
                     raise KeyError('%s not in index' % objarr[mask])
@@ -247,7 +262,7 @@ def _convert_to_indexer(self, obj, axis=0):
         else:
             if com.is_integer(obj) and not is_int_index:
                 return obj
-            return index.get_loc(obj)
+            return labels.get_loc(obj)
 
     def _tuplify(self, loc):
         tup = [slice(None, None) for _ in range(self.ndim)]
@@ -259,21 +274,40 @@ def _get_slice_axis(self, slice_obj, axis=0):
 
         axis_name = obj._get_axis_name(axis)
         labels = getattr(obj, axis_name)
-        if _is_label_slice(labels, slice_obj):
-            i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
-            slicer = slice(i, j)
 
-            if slice_obj.step is not None:
-                raise Exception('Non-zero step not supported with label-based '
-                                'slicing')
-        else:
+        int_slice = _is_integer_slice(slice_obj)
+
+        null_slice = slice_obj.start is None and slice_obj.stop is None
+        # could have integers in the first level of the MultiIndex
+        position_slice = (int_slice and not labels.inferred_type == 'integer'
+                          and not isinstance(labels, MultiIndex))
+        if null_slice or position_slice:
             slicer = slice_obj
+        else:
+            try:
+                i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
+                slicer = slice(i, j, slice_obj.step)
+            except Exception:
+                if _is_integer_slice(slice_obj):
+                    if labels.inferred_type == 'integer':
+                        raise
+                    slicer = slice_obj
+                else:
+                    raise
 
         if not _need_slice(slice_obj):
             return obj
 
         return obj._slice(slicer, axis=axis)
 
+def _is_integer_slice(obj):
+    def _crit(v):
+        return v is None or com.is_integer(v)
+
+    both_none = obj.start is None and obj.stop is None
+
+    return not both_none and (_crit(obj.start) and _crit(obj.stop))
+
 class _SeriesIndexer(_NDFrameIndexer):
     """
     Class to support fancy indexing, potentially using labels
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -269,10 +269,6 @@ def _init_dict(self, data, axes, dtype=None):
                 if dtype is not None:
                     v = v.astype(dtype)
                 values = v.values
-
-            # if values.ndim == 2:
-            #     values = values[None, :, :]
-
             reshaped_data[item] = values
 
         # segregates dtypes and forms blocks matching to columns
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -328,8 +328,36 @@ def test_getitem_fancy_2d(self):
 
     def test_getitem_fancy_slice_integers_step(self):
         df = DataFrame(np.random.randn(10, 5))
-        self.assertRaises(Exception, df.ix.__getitem__, slice(0, 8, 2))
-        self.assertRaises(Exception, df.ix.__setitem__, slice(0, 8, 2), np.nan)
+
+        # this is OK
+        result = df.ix[:8:2]
+        df.ix[:8:2] = np.nan
+        self.assert_(isnull(df.ix[:8:2]).values.all())
+
+    def test_getitem_setitem_integer_slice_keyerrors(self):
+        df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2))
+
+        # this is OK
+        cp = df.copy()
+        cp.ix[4:10] = 0
+        self.assert_((cp.ix[4:10] == 0).values.all())
+
+        # so is this
+        cp = df.copy()
+        cp.ix[3:11] = 0
+        self.assert_((cp.ix[3:11] == 0).values.all())
+
+        result = df.ix[4:10]
+        result2 = df.ix[3:11]
+        expected = df.reindex([4, 6, 8, 10])
+
+        assert_frame_equal(result, expected)
+        assert_frame_equal(result2, expected)
+
+        # non-monotonic, raise KeyError
+        df2 = df[::-1]
+        self.assertRaises(KeyError, df2.ix.__getitem__, slice(3, 11))
+        self.assertRaises(KeyError, df2.ix.__setitem__, slice(3, 11), 0)
 
     def test_setitem_fancy_2d(self):
         f = self.frame
@@ -1659,7 +1687,7 @@ def test_eng_float_formatter(self):
 
         repr(self.frame)
 
-        com.set_printoptions(precision=4)
+        com.reset_printoptions()
 
     def test_repr_tuples(self):
         buf = StringIO()
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -235,7 +235,7 @@ def test_getitem_toplevel(self):
         assert_frame_equal(result, expected)
         assert_frame_equal(result, result2)
 
-    def test_getitem_slice_integers(self):
+    def test_getitem_setitem_slice_integers(self):
         index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
                            labels=[[0, 0, 1, 1, 2, 2],
                                    [0, 1, 0, 1, 0, 1]])
@@ -246,12 +246,18 @@ def test_getitem_slice_integers(self):
         exp = frame[2:]
         assert_frame_equal(res, exp)
 
+        frame.ix[1:2] = 7
+        self.assert_((frame.ix[1:2] == 7).values.all())
+
         series =  Series(np.random.randn(len(index)), index=index)
 
         res = series.ix[1:2]
         exp = series[2:]
         assert_series_equal(res, exp)
 
+        series.ix[1:2] = 7
+        self.assert_((series.ix[1:2] == 7).values.all())
+
     def test_getitem_int(self):
         levels = [[0, 1], [0, 1, 2]]
         labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]