Skip to content

Commit

Permalink
ENH: add Series/DataFrame.update methods and rst docs, close #961
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed May 7, 2012
1 parent e8fc1c0 commit 41b3f9c
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 0 deletions.
1 change: 1 addition & 0 deletions RELEASE.rst
Expand Up @@ -65,6 +65,7 @@ pandas 0.8.0
- Fix label slicing issues with float index values (#1167)
- Fix segfault caused by empty groups passed to groupby (#1048)
- Fix occasionally misbehaved reindexing in the presence of NaN labels (#522)
- Fix imprecise logic causing weird Series results from .apply (#1183)

pandas 0.7.3
============
Expand Down
31 changes: 31 additions & 0 deletions doc/source/merging.rst
Expand Up @@ -6,6 +6,7 @@
import numpy as np
np.random.seed(123456)
from numpy import nan
from pandas import *
randn = np.random.randn
np.set_printoptions(precision=4, suppress=True)
Expand Down Expand Up @@ -552,3 +553,33 @@ them together on their indexes. The same is true for ``Panel.join``.
df1
df1.join([df2, df3])
.. _merging.multiple_join:

Merging together values within Series or DataFrame columns
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Another fairly common situation is to have two like-indexed (or similarly
indexed) Series or DataFrame objects and wanting to "patch" values in one
object from values for matching indices in the other. Here is an example:

.. ipython:: python
df1 = DataFrame([[nan, 3., 5.], [-4.6, np.nan, nan],
[nan, 7., nan]])
df2 = DataFrame([[-42.6, np.nan, -8.2], [-5., 1.6, 4]],
index=[1, 2])
For this, use the ``combine_first`` method:

.. ipython:: python
df1.combine_first(df2)
Note that this method only takes values from the right DataFrame if they are
missing in the left DataFrame. A related method, ``update``, alters non-NA
values inplace:

.. ipython:: python
df1.update(df2)
df1
19 changes: 19 additions & 0 deletions pandas/core/frame.py
Expand Up @@ -2781,6 +2781,25 @@ def combine_first(self, other):
combiner = lambda x, y: np.where(isnull(x), y, x)
return self.combine(other, combiner)

def update(self, other, join='left'):
"""
Modify DataFrame in place using non-NA values from passed
DataFrame. Aligns on indices
Parameters
----------
other : DataFrame
join : {'left', 'right', 'outer', 'inner'}, default 'left'
"""
if join != 'left':
raise NotImplementedError

other = other.reindex_like(self)
for col in self.columns:
this = self[col].values
that = other[col].values
self[col] = np.where(isnull(that), this, that)

#----------------------------------------------------------------------
# Misc methods

Expand Down
13 changes: 13 additions & 0 deletions pandas/core/series.py
Expand Up @@ -1559,6 +1559,19 @@ def combine_first(self, other):
return Series(np.where(isnull(this), other, this), index=new_index,
name=name)

def update(self, other):
"""
Modify Series in place using non-NA values from passed
Series. Aligns on index
Parameters
----------
other : Series
"""
other = other.reindex_like(self)
mask = notnull(other)
np.putmask(self.values, mask, other.values)

#----------------------------------------------------------------------
# Reindexing, sorting

Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_frame.py
Expand Up @@ -4316,6 +4316,23 @@ def test_combine_first_mixed_bug(self):
combined = frame1.combine_first(frame2)
self.assertEqual(len(combined.columns), 5)

def test_update(self):
df = DataFrame([[1.5, nan, 3.],
[1.5, nan, 3.],
[1.5, nan, 3],
[1.5, nan, 3]])

other = DataFrame([[3.6, 2., np.nan],
[np.nan, np.nan, 7]], index=[1, 3])

df.update(other)

expected = DataFrame([[1.5, nan, 3],
[3.6, 2, 3],
[1.5, nan, 3],
[1.5, nan, 7.]])
assert_frame_equal(df, expected)

def test_combineAdd(self):
# trivial
comb = self.frame.combineAdd(self.frame)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_series.py
Expand Up @@ -1527,6 +1527,14 @@ def test_combine_first(self):
result = s.combine_first(Series([], index=[]))
assert_series_equal(s, result)

def test_update(self):
s = Series([1.5, nan, 3., 4., nan])
s2 = Series([nan, 3.5, nan, 5.])
s.update(s2)

expected = Series([1.5, 3.5, 3., 5., np.nan])
assert_series_equal(s, expected)

def test_corr(self):
import scipy.stats as stats

Expand Down

0 comments on commit 41b3f9c

Please sign in to comment.