Skip to content

Commit

Permalink
ENH: refactor Concatenator to work for ndim > 2, add join-multiple to…
Browse files Browse the repository at this point in the history
… Panel, #115
  • Loading branch information
wesm committed Jan 5, 2012
1 parent 66d9bad commit 8afa1b5
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 166 deletions.
18 changes: 12 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,15 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,

NDFrame.__init__(self, mgr)

@classmethod
def _from_axes(cls, data, axes):
# for construction from BlockManager
if isinstance(data, BlockManager):
return cls(data)
else:
columns, index = axes
return cls(data, index=index, columns=columns, copy=False)

def _init_mgr(self, mgr, index, columns, dtype=None, copy=False):
if columns is not None:
mgr = mgr.reindex_axis(columns, axis=0, copy=False)
Expand Down Expand Up @@ -2751,9 +2760,6 @@ def append(self, other, ignore_index=False, verify_integrity=True):
return concat(to_concat, ignore_index=ignore_index,
verify_integrity=verify_integrity)

def _get_raw_column(self, col):
return self._data.get(col)

def join(self, other, on=None, how='left', lsuffix='', rsuffix=''):
"""
Join columns with other DataFrame either on index or on a key
Expand Down Expand Up @@ -2815,12 +2821,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix=''):
# join indexes only using concat
if how == 'left':
how = 'outer'
join_index = self.index
join_axes = [self.index]
else:
join_index = None
join_axes = None

return concat([self] + list(other), axis=1, join=how,
join_index=join_index, verify_integrity=True)
join_axes=join_axes, verify_integrity=True)

def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=True,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,7 +1144,7 @@ def transform(self, func, *args, **kwargs):
applied.append(res)

concat_index = obj.columns if self.axis == 0 else obj.index
concatenated = concat(applied, join_index=concat_index,
concatenated = concat(applied, join_axes=[concat_index],
axis=self.axis, verify_integrity=False)
return concatenated.reindex_like(obj)

Expand Down
48 changes: 36 additions & 12 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,16 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,

NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)

@classmethod
def _from_axes(cls, data, axes):
# for construction from BlockManager
if isinstance(data, BlockManager):
return cls(data)
else:
items, major, minor = axes
return cls(data, items=items, major_axis=major,
minor_axis=minor, copy=False)

def _init_dict(self, data, axes, dtype=None):
items, major, minor = axes

Expand Down Expand Up @@ -1067,13 +1077,13 @@ def truncate(self, before=None, after=None, axis='major'):

return self.reindex(**{axis : new_index})

def join(self, other, how=None, lsuffix='', rsuffix=''):
def join(self, other, how='left', lsuffix='', rsuffix=''):
"""
Join items with other Panel either on major and minor axes column
Parameters
----------
other : Panel
other : Panel or list of Panels
Index should be similar to one of the columns in this one
how : {'left', 'right', 'outer', 'inner'}
How to handle indexes of the two objects. Default: 'left'
Expand All @@ -1091,16 +1101,30 @@ def join(self, other, how=None, lsuffix='', rsuffix=''):
-------
joined : Panel
"""
if how is None:
how = 'left'
return self._join_index(other, how, lsuffix, rsuffix)

def _join_index(self, other, how, lsuffix, rsuffix):
join_major, join_minor = self._get_join_index(other, how)
this = self.reindex(major=join_major, minor=join_minor)
other = other.reindex(major=join_major, minor=join_minor)
merged_data = this._data.merge(other._data, lsuffix, rsuffix)
return self._constructor(merged_data)
from pandas.tools.merge import concat

if isinstance(other, Panel):
join_major, join_minor = self._get_join_index(other, how)
this = self.reindex(major=join_major, minor=join_minor)
other = other.reindex(major=join_major, minor=join_minor)
merged_data = this._data.merge(other._data, lsuffix, rsuffix)
return self._constructor(merged_data)
else:
if lsuffix or rsuffix:
raise ValueError('Suffixes not supported when passing multiple '
'panels')

if how == 'left':
how = 'outer'
join_axes = [self.major_axis, self.minor_axis]
elif how == 'right':
raise ValueError('Right join not supported with multiple '
'panels')
else:
join_axes = None

return concat([self] + list(other), axis=0, join=how,
join_axes=join_axes, verify_integrity=True)

def _get_join_index(self, other, how):
if how == 'left':
Expand Down
27 changes: 23 additions & 4 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
float64 data
"""

# pylint: disable=E1101,E1103,W0231
# pylint: disable=E1101,E1103,W0231,E0202

from numpy import nan
import numpy as np
Expand All @@ -13,10 +13,24 @@
from pandas.core.series import Series
from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray,
_default_index)
from pandas.util.decorators import cache_readonly
import pandas.core.datetools as datetools

from pandas.sparse.series import SparseSeries


class _SparseMockBlockManager(object):

def __init__(self, sp_frame):
self.sp_frame = sp_frame

def get(self, item):
return self.sp_frame[item].values

@property
def axes(self):
return [self.sp_frame.columns, self.sp_frame.index]

class SparseDataFrame(DataFrame):
"""
DataFrame containing sparse floating point data in the form of SparseSeries
Expand Down Expand Up @@ -71,6 +85,14 @@ def __init__(self, data=None, index=None, columns=None,
self.columns = columns
self.index = index

def _from_axes(self, data, axes):
columns, index = axes
return self._constructor(data, index=index, columns=columns)

@cache_readonly
def _data(self):
return _SparseMockBlockManager(self)

def _get_numeric_columns(self):
# everything is necessarily float64
return self.columns
Expand Down Expand Up @@ -512,9 +534,6 @@ def _rename_columns_inplace(self, mapper):
self.columns = new_columns
self._series = new_series

def _get_raw_column(self, col):
return self._series[col].values

def add_prefix(self, prefix):
f = (('%s' % prefix) + '%s').__mod__
return self.rename(columns=f)
Expand Down
42 changes: 0 additions & 42 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,48 +902,6 @@ def test_shift(self):

self.assertRaises(Exception, self.panel.shift, 1, axis='items')

def test_join(self):
p1 = self.panel.ix[:2, :10, :3]
p2 = self.panel.ix[2:, 5:, 2:]

# left join
result = p1.join(p2)
expected = p1.copy()
expected['ItemC'] = p2['ItemC']
assert_panel_equal(result, expected)

# right join
result = p1.join(p2, how='right')
expected = p2.copy()
expected['ItemA'] = p1['ItemA']
expected['ItemB'] = p1['ItemB']
expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC'])
assert_panel_equal(result, expected)

# inner join
result = p1.join(p2, how='inner')
expected = self.panel.ix[:, 5:10, 2:3]
assert_panel_equal(result, expected)

# outer join
result = p1.join(p2, how='outer')
expected = p1.reindex(major=self.panel.major_axis,
minor=self.panel.minor_axis)
expected = expected.join(p2.reindex(major=self.panel.major_axis,
minor=self.panel.minor_axis))
assert_panel_equal(result, expected)

def test_join_overlap(self):
p1 = self.panel.ix[['ItemA', 'ItemB', 'ItemC']]
p2 = self.panel.ix[['ItemB', 'ItemC']]

joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2')
p1_suf = p1.ix[['ItemB', 'ItemC']].add_suffix('_p1')
p2_suf = p2.ix[['ItemB', 'ItemC']].add_suffix('_p2')
no_overlap = self.panel.ix[['ItemA']]
expected = p1_suf.join(p2_suf).join(no_overlap)
assert_panel_equal(joined, expected)

def test_repr_empty(self):
empty = Panel()
repr(empty)
Expand Down
Loading

0 comments on commit 8afa1b5

Please sign in to comment.