Skip to content

Commit

Permalink
ENH: add Panel.take, implement set ops between MultiIndex and Index. …
Browse files Browse the repository at this point in the history
…plus test coverage
  • Loading branch information
wesm committed Oct 24, 2011
1 parent eddd5c9 commit 394bb0d
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 89 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Expand Up @@ -128,6 +128,7 @@ feedback on the library.
- Added `pivot_table` convenience function to pandas namespace (GH #234)
- Implemented `Panel.rename_axis` function (GH #243)
- DataFrame will show index level names in console output
- Implemented `Panel.take`

**Improvements to existing features**

Expand Down Expand Up @@ -189,6 +190,7 @@ feedback on the library.
issue GH #262
- Can pass list of tuples to `Series` (GH #270)
- Can pass level name to `DataFrame.stack`
- Support set operations between MultiIndex and Index

Thanks
------
Expand Down
2 changes: 1 addition & 1 deletion pandas/__init__.py
Expand Up @@ -8,7 +8,7 @@

try:
import pandas._tseries as lib
except Exception, e:
except Exception, e: # pragma: no cover
if 'No module named' in e.message:
raise ImportError('C extensions not built: if you installed already '
'verify that you are not importing from the source '
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Expand Up @@ -2308,7 +2308,7 @@ def count(self, axis=0, level=None, numeric_only=False):
else:
frame = self

result = frame.apply(Series.count, axis=axis)
result = DataFrame.apply(frame, Series.count, axis=axis)

# what happens with empty DataFrame
if isinstance(result, DataFrame):
Expand Down
24 changes: 23 additions & 1 deletion pandas/core/generic.py
Expand Up @@ -204,7 +204,7 @@ def sort_index(self, axis=0, ascending=True):
def ix(self):
raise NotImplementedError

def reindex(self, **kwds):
def reindex(self, *args, **kwds):
raise NotImplementedError

class NDFrame(PandasObject):
Expand Down Expand Up @@ -486,3 +486,25 @@ def rename_axis(self, mapper, axis=0, copy=True):
new_data = new_data.copy()

return self._constructor(new_data)

def take(self, indices, axis=0):
"""
Analogous to ndarray.take
Parameters
----------
indices : list / array of ints
axis : int, default 0
Returns
-------
taken : type of caller
"""
if axis == 0:
labels = self._get_axis(axis)
new_items = labels.take(indices)
new_data = self._data.reindex_items(new_items)
else:
new_data = self._data.take(indices, axis=axis)
return self._constructor(new_data)

80 changes: 35 additions & 45 deletions pandas/core/index.py
Expand Up @@ -39,6 +39,11 @@ class Index(np.ndarray):
----
An Index instance can **only** contain hashable objects
"""
_map_indices = lib.map_indices_object
_is_monotonic = lib.is_monotonic_object
_groupby = lib.groupby_object
_arrmap = lib.arrmap_object

name = None
def __new__(cls, data, dtype=None, copy=False, name=None):
if isinstance(data, np.ndarray):
Expand Down Expand Up @@ -67,6 +72,10 @@ def dtype(self):
def nlevels(self):
return 1

@property
def _constructor(self):
return Index

def summary(self):
if len(self) > 0:
index_summary = ', %s to %s' % (str(self[0]), str(self[-1]))
Expand All @@ -82,15 +91,16 @@ def values(self):

@cache_readonly
def is_monotonic(self):
return lib.is_monotonic_object(self)
return self._is_monotonic(self)

_indexMap = None
_integrity = False

@property
def indexMap(self):
"{label -> location}"
if self._indexMap is None:
self._indexMap = lib.map_indices_object(self)
self._indexMap = self._map_indices(self)
self._integrity = len(self._indexMap) == len(self)

if not self._integrity:
Expand Down Expand Up @@ -185,7 +195,7 @@ def take(self, *args, **kwargs):
Analogous to ndarray.take
"""
taken = self.view(np.ndarray).take(*args, **kwargs)
return Index(taken, name=self.name)
return self._constructor(taken, name=self.name)

def format(self, name=False):
"""
Expand Down Expand Up @@ -305,7 +315,7 @@ def union(self, other):
return _ensure_index(other)

if self.is_monotonic and other.is_monotonic:
result = lib.outer_join_indexer_object(self, other)[0]
result = lib.outer_join_indexer_object(self, other.values)[0]
else:
indexer = self.get_indexer(other)
indexer = (indexer == -1).nonzero()[0]
Expand Down Expand Up @@ -356,9 +366,10 @@ def intersection(self, other):
other = other.astype(object)

if self.is_monotonic and other.is_monotonic:
return Index(lib.inner_join_indexer_object(self, other)[0])
return Index(lib.inner_join_indexer_object(self,
other.values)[0])
else:
indexer = self.get_indexer(other)
indexer = self.get_indexer(other.values)
indexer = indexer.take((indexer != -1).nonzero()[0])
return self.take(indexer)

Expand Down Expand Up @@ -446,10 +457,10 @@ def get_indexer(self, target, method=None):
return indexer

def groupby(self, to_groupby):
return lib.groupby_object(self.values, to_groupby)
return self._groupby(self.values, to_groupby)

def map(self, mapper):
return lib.arrmap_object(self.values, mapper)
return self._arrmap(self.values, mapper)

def _get_method(self, method):
if method:
Expand Down Expand Up @@ -621,6 +632,11 @@ def copy(self, order='C'):

class Int64Index(Index):

_map_indices = lib.map_indices_int64
_is_monotonic = lib.is_monotonic_int64
_groupby = lib.groupby_int64
_arrmap = lib.arrmap_int64

def __new__(cls, data, dtype=None, copy=False, name=None):
if not isinstance(data, np.ndarray):
if np.isscalar(data):
Expand Down Expand Up @@ -648,29 +664,17 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
subarr.name = name
return subarr

@property
def _constructor(self):
return Int64Index

def astype(self, dtype):
return Index(self.values.astype(dtype))

@property
def dtype(self):
return np.dtype('int64')

@cache_readonly
def is_monotonic(self):
return lib.is_monotonic_int64(self)

@property
def indexMap(self):
"{label -> location}"
if self._indexMap is None:
self._indexMap = lib.map_indices_int64(self)
self._integrity = len(self._indexMap) == len(self)

if not self._integrity:
raise Exception('Index cannot contain duplicate values!')

return self._indexMap

def is_all_dates(self):
"""
Checks that all the labels are datetime objects
Expand Down Expand Up @@ -771,19 +775,6 @@ def union(self, other):
return Int64Index(result)
union.__doc__ = Index.union.__doc__

def groupby(self, to_groupby):
return lib.groupby_int64(self, to_groupby)

def map(self, mapper):
return lib.arrmap_int64(self, mapper)

def take(self, *args, **kwargs):
"""
Analogous to ndarray.take
"""
taken = self.values.take(*args, **kwargs)
return Int64Index(taken, name=self.name)

class DateIndex(Index):
pass

Expand Down Expand Up @@ -1267,16 +1258,9 @@ def get_indexer(self, target, method=None):
"""
method = self._get_method(method)

target_index = target
if isinstance(target, MultiIndex):
target_index = target.get_tuple_index()
else:
if len(target) > 0:
val = target[0]
if not isinstance(val, tuple) or len(val) != self.nlevels:
raise ValueError('can only pass MultiIndex or '
'array of tuples')

target_index = target

self_index = self.get_tuple_index()

Expand Down Expand Up @@ -1509,6 +1493,9 @@ def union(self, other):
-------
Index
"""
if not isinstance(other, MultiIndex):
return other.union(self)

self._assert_can_do_setop(other)

if len(other) == 0 or self.equals(other):
Expand All @@ -1533,6 +1520,9 @@ def intersection(self, other):
-------
Index
"""
if not isinstance(other, MultiIndex):
return other.intersection(self)

self._assert_can_do_setop(other)

if self.equals(other):
Expand Down
29 changes: 10 additions & 19 deletions pandas/core/internals.py
Expand Up @@ -176,31 +176,19 @@ def should_store(self, value):
# unnecessarily
return issubclass(value.dtype.type, np.floating)

def can_store(self, value):
return issubclass(value.dtype.type, (np.integer, np.floating))

class IntBlock(Block):

def should_store(self, value):
return self.can_store(value)

def can_store(self, value):
return issubclass(value.dtype.type, np.integer)

class BoolBlock(Block):

def should_store(self, value):
return self.can_store(value)

def can_store(self, value):
return issubclass(value.dtype.type, np.bool_)

class ObjectBlock(Block):

def should_store(self, value):
return self.can_store(value)

def can_store(self, value):
return not issubclass(value.dtype.type,
(np.integer, np.floating, np.bool_))

Expand Down Expand Up @@ -676,21 +664,24 @@ def reindex_items(self, new_items):

return BlockManager(new_blocks, new_axes)

def take(self, indexer, axis=1, pandas_indexer=False):
def take(self, indexer, axis=1):
if axis == 0:
raise NotImplementedError

if pandas_indexer:
take_f = lambda arr: common.take_fast(arr, indexer,
None, False, axis=axis)
else:
take_f = lambda arr: arr.take(indexer, axis=axis)
indexer = np.asarray(indexer, dtype='i4')

n = len(self.axes[axis])
if ((indexer == -1) | (indexer >= n)).any():
raise Exception('Indices must be nonzero and less than '
'the axis length')

new_axes = list(self.axes)
new_axes[axis] = self.axes[axis].take(indexer)
new_blocks = []
for blk in self.blocks:
newb = make_block(take_f(blk.values), blk.items, self.items)
new_values = common.take_fast(blk.values, indexer,
None, False, axis=axis)
newb = make_block(new_values, blk.items, self.items)
new_blocks.append(newb)

return BlockManager(new_blocks, new_axes)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/panel.py
Expand Up @@ -665,7 +665,8 @@ def fillna(self, value=None, method='pad'):

try:
divide = div = _panel_arith_method(operator.div, 'divide')
except AttributeError: # Python 3
except AttributeError: # pragma: no cover
# Python 3
divide = div = _panel_arith_method(operator.truediv, 'divide')

def major_xs(self, key, copy=True):
Expand Down Expand Up @@ -1235,7 +1236,8 @@ def _combine_panel_frame(self, other, func, axis='items'):

try:
divide = div = _panel_arith_method(operator.div, 'divide')
except AttributeError: # Python 3
except AttributeError: # pragma: no cover
# Python 3
divide = div = _panel_arith_method(operator.truediv, 'divide')

def to_wide(self):
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/reshape.py
Expand Up @@ -287,6 +287,9 @@ def stack(frame, level=-1, dropna=True):
stacked : Series
"""
N, K = frame.shape
if isinstance(level, int) and level < 0:
level += frame.columns.nlevels

level = frame.columns._get_level_number(level)

if isinstance(frame.columns, MultiIndex):
Expand Down Expand Up @@ -318,8 +321,6 @@ def stack(frame, level=-1, dropna=True):

def _stack_multi_columns(frame, level=-1, dropna=True):
this = frame.copy()
if level < 0:
level += frame.columns.nlevels

# this makes life much simpler
if level != frame.columns.nlevels - 1:
Expand Down

0 comments on commit 394bb0d

Please sign in to comment.