Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

Issue #806 DataFrame/Series.align can now specify fill_value or fill_method #807

Merged
merged 8 commits into from Feb 23, 2012
View
@@ -1669,7 +1669,8 @@ def lookup(self, row_labels, col_labels):
#----------------------------------------------------------------------
# Reindexing and alignment
- def align(self, other, join='outer', axis=None, level=None, copy=True):
+ def align(self, other, join='outer', axis=None, level=None, copy=True,
+ fill_value=None, fill_method=None):
"""
Align two DataFrame object on their index and columns with the
specified join method for each axis Index
@@ -1683,6 +1684,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True):
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level
+ copy : boolean, default True
+ Always returns new objects. If copy=False and no reindexing is
+ required then original objects are returned.
+ fill_value : object, default None
+ fill_method : str, default None
Returns
-------
@@ -1691,15 +1697,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True):
"""
if isinstance(other, DataFrame):
return self._align_frame(other, join=join, axis=axis, level=level,
- copy=copy)
+ copy=copy,
+ fill_value=fill_value,
+ fill_method=fill_method)
elif isinstance(other, Series):
return self._align_series(other, join=join, axis=axis, level=level,
- copy=copy)
+ copy=copy,
+ fill_value=fill_value,
+ fill_method=fill_method)
else: # pragma: no cover
raise TypeError('unsupported type: %s' % type(other))
def _align_frame(self, other, join='outer', axis=None, level=None,
- copy=True):
+ copy=True, fill_value=None, fill_method=None):
# defaults
join_index, join_columns = None, None
ilidx, iridx = None, None
@@ -1721,10 +1731,15 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
join_columns, clidx, copy)
right = other._reindex_with_indexers(join_index, iridx,
join_columns, cridx, copy)
- return left, right
+ fill_na = (fill_value is not None) or (fill_method is not None)
+ if fill_na:
+ return (left.fillna(fill_value, method=fill_method),
+ right.fillna(fill_value, method=fill_method))
+ else:
+ return left, right
def _align_series(self, other, join='outer', axis=None, level=None,
- copy=True):
+ copy=True, fill_value=None, fill_method=None):
fdata = self._data
if axis == 0:
join_index = self.index
@@ -1753,7 +1768,13 @@ def _align_series(self, other, join='outer', axis=None, level=None,
left_result = DataFrame(fdata)
right_result = other if ridx is None else other.reindex(join_index)
- return left_result, right_result
+
+ fill_na = (fill_value is not None) or (fill_method is not None)
+ if fill_na:
+ return (left_result.fillna(fill_value, fill_method=fill_method),
+ right_result.fillna(fill_value, fill_method=fill_method))
+ else:
+ return left_result, right_result
def reindex(self, index=None, columns=None, method=None, level=None,
copy=True):
@@ -4080,18 +4101,6 @@ def _to_sdict(data, columns):
else: # pragma: no cover
raise TypeError('No logic to handle %s type' % type(data[0]))
-def _list_to_sdict(data, columns):
- if len(data) > 0 and isinstance(data[0], tuple):
- content = list(lib.to_object_array_tuples(data).T)
- elif len(data) > 0:
- # list of lists
- content = list(lib.to_object_array(data).T)
- else:
- if columns is None:
- columns = []
- return {}, columns
- return _convert_object_array(content, columns)
-
def _list_of_series_to_sdict(data, columns):
from pandas.core.index import _get_combined_index
@@ -4107,6 +4116,7 @@ def _list_of_series_to_sdict(data, columns):
else:
return values, columns
+
def _list_of_dict_to_sdict(data, columns):
if columns is None:
gen = (x.keys() for x in data)
View
@@ -1786,7 +1786,8 @@ def apply(self, func):
mapped = lib.map_infer(self.values, func)
return Series(mapped, index=self.index, name=self.name)
- def align(self, other, join='outer', level=None, copy=True):
+ def align(self, other, join='outer', level=None, copy=True,
+ fill_value=None, fill_method=None):
"""
Align two Series object with the specified join method
@@ -1800,6 +1801,8 @@ def align(self, other, join='outer', level=None, copy=True):
copy : boolean, default True
Always return new objects. If copy=False and no reindexing is
required, the same object will be returned (for better performance)
+ fill_value : object, default None
+ fill_method : str, default 'pad'
Returns
-------
@@ -1812,7 +1815,12 @@ def align(self, other, join='outer', level=None, copy=True):
left = self._reindex_indexer(join_index, lidx, copy)
right = other._reindex_indexer(join_index, ridx, copy)
- return left, right
+ fill_na = (fill_value is not None) or (fill_method is not None)
+ if fill_na:
+ return (left.fillna(fill_value, method=fill_method),
+ right.fillna(fill_value, method=fill_method))
+ else:
+ return left, right
def _reindex_indexer(self, new_index, indexer, copy):
if indexer is not None:
View
@@ -3180,8 +3180,15 @@ def test_align(self):
# axis = 0
other = self.frame.ix[:-5, :3]
- af, bf = self.frame.align(other, axis=0)
- self.assert_(bf.columns.equals(other.columns))
+ af, bf = self.frame.align(other, axis=0, fill_value=-1)
+ self.assert_(bf.columns.equals(other.columns))
+ #test fill value
+ join_idx = self.frame.index.join(other.index)
+ diff_a = self.frame.index.diff(join_idx)
+ diff_b = other.index.diff(join_idx)
+ diff_a_vals = af.reindex(diff_a).values
+ diff_b_vals = bf.reindex(diff_b).values
+ self.assert_((diff_a_vals == -1).all())
af, bf = self.frame.align(other, join='right', axis=0)
self.assert_(bf.columns.equals(other.columns))
@@ -3194,6 +3201,14 @@ def test_align(self):
self.assert_(bf.columns.equals(self.frame.columns))
self.assert_(bf.index.equals(other.index))
+ #test fill value
+ join_idx = self.frame.index.join(other.index)
+ diff_a = self.frame.index.diff(join_idx)
+ diff_b = other.index.diff(join_idx)
+ diff_a_vals = af.reindex(diff_a).values
+ diff_b_vals = bf.reindex(diff_b).values
+ self.assert_((diff_a_vals == -1).all())
+
af, bf = self.frame.align(other, join='inner', axis=1)
self.assert_(bf.columns.equals(other.columns))
@@ -1783,10 +1783,18 @@ def test_apply(self):
assert_series_equal(result, self.ts * 2)
def test_align(self):
- def _check_align(a, b, how='left'):
- aa, ab = a.align(b, join=how)
-
+ def _check_align(a, b, how='left', fill=None):
+ aa, ab = a.align(b, join=how, fill_value=fill)
+
join_index = a.index.join(b.index, how=how)
+ if fill is not None:
+ diff_a = aa.index.diff(join_index)
+ diff_b = ab.index.diff(join_index)
+ if len(diff_a) > 0:
+ self.assert_((aa.reindex(diff_a) == fill).all())
+ if len(diff_b) > 0:
+ self.assert_((ab.reindex(diff_b) == fill).all())
+
ea = a.reindex(join_index)
eb = b.reindex(join_index)
@@ -1795,6 +1803,7 @@ def _check_align(a, b, how='left'):
for kind in JOIN_TYPES:
_check_align(self.ts[2:], self.ts[:-5])
+ _check_align(self.ts[2:], self.ts[:-5], -1)
# empty left
_check_align(self.ts[:0], self.ts[:-5])