ENH: add Panel.take, implement set ops between MultiIndex and Index. plus test coverage

wesm · wesm · commit 394bb0d9d84f · 2011-10-24T00:59:28.000-04:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -128,6 +128,7 @@ feedback on the library.
   - Added `pivot_table` convenience function to pandas namespace (GH #234)
   - Implemented `Panel.rename_axis` function (GH #243)
   - DataFrame will show index level names in console output
+  - Implemented `Panel.take`
 
 **Improvements to existing features**
 
@@ -189,6 +190,7 @@ feedback on the library.
     issue GH #262
   - Can pass list of tuples to `Series` (GH #270)
   - Can pass level name to `DataFrame.stack`
+  - Support set operations between MultiIndex and Index
 
 Thanks
 ------
diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -8,7 +8,7 @@
 
 try:
     import pandas._tseries as lib
-except Exception, e:
+except Exception, e:  # pragma: no cover
     if 'No module named' in e.message:
         raise ImportError('C extensions not built: if you installed already '
                           'verify that you are not importing from the source '
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2308,7 +2308,7 @@ def count(self, axis=0, level=None, numeric_only=False):
         else:
             frame = self
 
-        result = frame.apply(Series.count, axis=axis)
+        result = DataFrame.apply(frame, Series.count, axis=axis)
 
         # what happens with empty DataFrame
         if isinstance(result, DataFrame):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -204,7 +204,7 @@ def sort_index(self, axis=0, ascending=True):
     def ix(self):
         raise NotImplementedError
 
-    def reindex(self, **kwds):
+    def reindex(self, *args, **kwds):
         raise NotImplementedError
 
 class NDFrame(PandasObject):
@@ -486,3 +486,25 @@ def rename_axis(self, mapper, axis=0, copy=True):
                 new_data = new_data.copy()
 
         return self._constructor(new_data)
+
+    def take(self, indices, axis=0):
+        """
+        Analogous to ndarray.take
+
+        Parameters
+        ----------
+        indices : list / array of ints
+        axis : int, default 0
+
+        Returns
+        -------
+        taken : type of caller
+        """
+        if axis == 0:
+            labels = self._get_axis(axis)
+            new_items = labels.take(indices)
+            new_data = self._data.reindex_items(new_items)
+        else:
+            new_data = self._data.take(indices, axis=axis)
+        return self._constructor(new_data)
+
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -39,6 +39,11 @@ class Index(np.ndarray):
     ----
     An Index instance can **only** contain hashable objects
     """
+    _map_indices = lib.map_indices_object
+    _is_monotonic = lib.is_monotonic_object
+    _groupby = lib.groupby_object
+    _arrmap = lib.arrmap_object
+
     name = None
     def __new__(cls, data, dtype=None, copy=False, name=None):
         if isinstance(data, np.ndarray):
@@ -67,6 +72,10 @@ def dtype(self):
     def nlevels(self):
         return 1
 
+    @property
+    def _constructor(self):
+        return Index
+
     def summary(self):
         if len(self) > 0:
             index_summary = ', %s to %s' % (str(self[0]), str(self[-1]))
@@ -82,15 +91,16 @@ def values(self):
 
     @cache_readonly
     def is_monotonic(self):
-        return lib.is_monotonic_object(self)
+        return self._is_monotonic(self)
 
     _indexMap = None
     _integrity = False
+
     @property
     def indexMap(self):
         "{label -> location}"
         if self._indexMap is None:
-            self._indexMap = lib.map_indices_object(self)
+            self._indexMap = self._map_indices(self)
             self._integrity = len(self._indexMap) == len(self)
 
         if not self._integrity:
@@ -185,7 +195,7 @@ def take(self, *args, **kwargs):
         Analogous to ndarray.take
         """
         taken = self.view(np.ndarray).take(*args, **kwargs)
-        return Index(taken, name=self.name)
+        return self._constructor(taken, name=self.name)
 
     def format(self, name=False):
         """
@@ -305,7 +315,7 @@ def union(self, other):
             return _ensure_index(other)
 
         if self.is_monotonic and other.is_monotonic:
-            result = lib.outer_join_indexer_object(self, other)[0]
+            result = lib.outer_join_indexer_object(self, other.values)[0]
         else:
             indexer = self.get_indexer(other)
             indexer = (indexer == -1).nonzero()[0]
@@ -356,9 +366,10 @@ def intersection(self, other):
             other = other.astype(object)
 
         if self.is_monotonic and other.is_monotonic:
-            return Index(lib.inner_join_indexer_object(self, other)[0])
+            return Index(lib.inner_join_indexer_object(self,
+                                                       other.values)[0])
         else:
-            indexer = self.get_indexer(other)
+            indexer = self.get_indexer(other.values)
             indexer = indexer.take((indexer != -1).nonzero()[0])
             return self.take(indexer)
 
@@ -446,10 +457,10 @@ def get_indexer(self, target, method=None):
         return indexer
 
     def groupby(self, to_groupby):
-        return lib.groupby_object(self.values, to_groupby)
+        return self._groupby(self.values, to_groupby)
 
     def map(self, mapper):
-        return lib.arrmap_object(self.values, mapper)
+        return self._arrmap(self.values, mapper)
 
     def _get_method(self, method):
         if method:
@@ -621,6 +632,11 @@ def copy(self, order='C'):
 
 class Int64Index(Index):
 
+    _map_indices = lib.map_indices_int64
+    _is_monotonic = lib.is_monotonic_int64
+    _groupby = lib.groupby_int64
+    _arrmap = lib.arrmap_int64
+
     def __new__(cls, data, dtype=None, copy=False, name=None):
         if not isinstance(data, np.ndarray):
             if np.isscalar(data):
@@ -648,29 +664,17 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
         subarr.name = name
         return subarr
 
+    @property
+    def _constructor(self):
+        return Int64Index
+
     def astype(self, dtype):
         return Index(self.values.astype(dtype))
 
     @property
     def dtype(self):
         return np.dtype('int64')
 
-    @cache_readonly
-    def is_monotonic(self):
-        return lib.is_monotonic_int64(self)
-
-    @property
-    def indexMap(self):
-        "{label -> location}"
-        if self._indexMap is None:
-            self._indexMap = lib.map_indices_int64(self)
-            self._integrity = len(self._indexMap) == len(self)
-
-        if not self._integrity:
-            raise Exception('Index cannot contain duplicate values!')
-
-        return self._indexMap
-
     def is_all_dates(self):
         """
         Checks that all the labels are datetime objects
@@ -771,19 +775,6 @@ def union(self, other):
         return Int64Index(result)
     union.__doc__ = Index.union.__doc__
 
-    def groupby(self, to_groupby):
-        return lib.groupby_int64(self, to_groupby)
-
-    def map(self, mapper):
-        return lib.arrmap_int64(self, mapper)
-
-    def take(self, *args, **kwargs):
-        """
-        Analogous to ndarray.take
-        """
-        taken = self.values.take(*args, **kwargs)
-        return Int64Index(taken, name=self.name)
-
 class DateIndex(Index):
     pass
 
@@ -1267,16 +1258,9 @@ def get_indexer(self, target, method=None):
         """
         method = self._get_method(method)
 
+        target_index = target
         if isinstance(target, MultiIndex):
             target_index = target.get_tuple_index()
-        else:
-            if len(target) > 0:
-                val = target[0]
-                if not isinstance(val, tuple) or len(val) != self.nlevels:
-                    raise ValueError('can only pass MultiIndex or '
-                                     'array of tuples')
-
-            target_index = target
 
         self_index = self.get_tuple_index()
 
@@ -1509,6 +1493,9 @@ def union(self, other):
         -------
         Index
         """
+        if not isinstance(other, MultiIndex):
+            return other.union(self)
+
         self._assert_can_do_setop(other)
 
         if len(other) == 0 or self.equals(other):
@@ -1533,6 +1520,9 @@ def intersection(self, other):
         -------
         Index
         """
+        if not isinstance(other, MultiIndex):
+            return other.intersection(self)
+
         self._assert_can_do_setop(other)
 
         if self.equals(other):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -176,31 +176,19 @@ def should_store(self, value):
         # unnecessarily
         return issubclass(value.dtype.type, np.floating)
 
-    def can_store(self, value):
-        return issubclass(value.dtype.type, (np.integer, np.floating))
-
 class IntBlock(Block):
 
     def should_store(self, value):
-        return self.can_store(value)
-
-    def can_store(self, value):
         return issubclass(value.dtype.type, np.integer)
 
 class BoolBlock(Block):
 
     def should_store(self, value):
-        return self.can_store(value)
-
-    def can_store(self, value):
         return issubclass(value.dtype.type, np.bool_)
 
 class ObjectBlock(Block):
 
     def should_store(self, value):
-        return self.can_store(value)
-
-    def can_store(self, value):
         return not issubclass(value.dtype.type,
                               (np.integer, np.floating, np.bool_))
 
@@ -676,21 +664,24 @@ def reindex_items(self, new_items):
 
         return BlockManager(new_blocks, new_axes)
 
-    def take(self, indexer, axis=1, pandas_indexer=False):
+    def take(self, indexer, axis=1):
         if axis == 0:
             raise NotImplementedError
 
-        if pandas_indexer:
-            take_f = lambda arr: common.take_fast(arr, indexer,
-                                                  None, False, axis=axis)
-        else:
-            take_f = lambda arr: arr.take(indexer, axis=axis)
+        indexer = np.asarray(indexer, dtype='i4')
+
+        n = len(self.axes[axis])
+        if ((indexer == -1) | (indexer >= n)).any():
+            raise Exception('Indices must be nonzero and less than '
+                            'the axis length')
 
         new_axes = list(self.axes)
         new_axes[axis] = self.axes[axis].take(indexer)
         new_blocks = []
         for blk in self.blocks:
-            newb = make_block(take_f(blk.values), blk.items, self.items)
+            new_values = common.take_fast(blk.values, indexer,
+                                          None, False, axis=axis)
+            newb = make_block(new_values, blk.items, self.items)
             new_blocks.append(newb)
 
         return BlockManager(new_blocks, new_axes)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -665,7 +665,8 @@ def fillna(self, value=None, method='pad'):
 
     try:
         divide = div = _panel_arith_method(operator.div, 'divide')
-    except AttributeError:   # Python 3
+    except AttributeError:  # pragma: no cover
+        # Python 3
         divide = div = _panel_arith_method(operator.truediv, 'divide')
 
     def major_xs(self, key, copy=True):
@@ -1235,7 +1236,8 @@ def _combine_panel_frame(self, other, func, axis='items'):
 
     try:
         divide = div = _panel_arith_method(operator.div, 'divide')
-    except AttributeError:   # Python 3
+    except AttributeError:  # pragma: no cover
+        # Python 3
         divide = div = _panel_arith_method(operator.truediv, 'divide')
 
     def to_wide(self):
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -287,6 +287,9 @@ def stack(frame, level=-1, dropna=True):
     stacked : Series
     """
     N, K = frame.shape
+    if isinstance(level, int) and level < 0:
+        level += frame.columns.nlevels
+
     level = frame.columns._get_level_number(level)
 
     if isinstance(frame.columns, MultiIndex):
@@ -318,8 +321,6 @@ def stack(frame, level=-1, dropna=True):
 
 def _stack_multi_columns(frame, level=-1, dropna=True):
     this = frame.copy()
-    if level < 0:
-        level += frame.columns.nlevels
 
     # this makes life much simpler
     if level != frame.columns.nlevels - 1:
diff --git a/pandas/core/series.py b/pandas/core/series.py
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py