Skip to content

Commit

Permalink
BUG: make inplace semantics of DataFrame.where consistent. #2230
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 13, 2012
1 parent 564175e commit ca8a6ba
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 16 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pandas 0.9.1
- Improve performance of Series/DataFrame.diff (re: #2087) - Improve performance of Series/DataFrame.diff (re: #2087)
- Support unary ~ (__invert__) in DataFrame (#2110) - Support unary ~ (__invert__) in DataFrame (#2110)
- Turn off pandas-style tick locators and formatters (#2205) - Turn off pandas-style tick locators and formatters (#2205)
- DataFrame[DataFrame] uses DataFrame.where to compute masked frame (#2230)


**Bug fixes** **Bug fixes**


Expand Down Expand Up @@ -100,6 +101,7 @@ pandas 0.9.1
- Fix improper MultiIndex conversion issue when assigning - Fix improper MultiIndex conversion issue when assigning
e.g. DataFrame.index (#2200) e.g. DataFrame.index (#2200)
- Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236) - Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
- Fix duplicate columns issue (#2218, #2219)
pandas 0.9.0 pandas 0.9.0
============ ============
Expand Down
19 changes: 13 additions & 6 deletions pandas/core/frame.py
100755 → 100644
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -1884,7 +1884,7 @@ def _boolean_set(self, key, value):
if self._is_mixed_type: if self._is_mixed_type:
raise ValueError('Cannot do boolean setting on mixed-type frame') raise ValueError('Cannot do boolean setting on mixed-type frame')


self.where(key, value, inplace=True) self.where(-key, value, inplace=True)


def _set_item_multiple(self, keys, value): def _set_item_multiple(self, keys, value):
if isinstance(value, DataFrame): if isinstance(value, DataFrame):
Expand Down Expand Up @@ -4872,7 +4872,6 @@ def where(self, cond, other=NA, inplace=False):
Return a DataFrame with the same shape as self and whose corresponding Return a DataFrame with the same shape as self and whose corresponding
entries are from self where cond is True and otherwise are from other. entries are from self where cond is True and otherwise are from other.
Parameters Parameters
---------- ----------
cond: boolean DataFrame or array cond: boolean DataFrame or array
Expand All @@ -4882,17 +4881,25 @@ def where(self, cond, other=NA, inplace=False):
------- -------
wh: DataFrame wh: DataFrame
""" """
if not hasattr(cond,'shape'): if not hasattr(cond, 'shape'):
raise ValueError('where requires an ndarray like object for its condition') raise ValueError('where requires an ndarray like object for its '
'condition')


if isinstance(cond, np.ndarray): if isinstance(cond, np.ndarray):
if cond.shape != self.shape: if cond.shape != self.shape:
raise ValueError('Array onditional must be same shape as self') raise ValueError('Array onditional must be same shape as self')
cond = self._constructor(cond, index=self.index, cond = self._constructor(cond, index=self.index,
columns=self.columns) columns=self.columns)

if cond.shape != self.shape: if cond.shape != self.shape:
cond = cond.reindex(self.index, columns=self.columns) cond = cond.reindex(self.index, columns=self.columns)
cond = cond.fillna(False)
if inplace:
cond = -(cond.fillna(True).astype(bool))
else:
cond = cond.fillna(False).astype(bool)
elif inplace:
cond = -cond


if isinstance(other, DataFrame): if isinstance(other, DataFrame):
_, other = self.align(other, join='left', fill_value=NA) _, other = self.align(other, join='left', fill_value=NA)
Expand All @@ -4903,7 +4910,7 @@ def where(self, cond, other=NA, inplace=False):


rs = np.where(cond, self, other) rs = np.where(cond, self, other)
return self._constructor(rs, self.index, self.columns) return self._constructor(rs, self.index, self.columns)

def mask(self, cond): def mask(self, cond):
""" """
Returns copy of self whose values are replaced with nan if the Returns copy of self whose values are replaced with nan if the
Expand Down
30 changes: 20 additions & 10 deletions pandas/tests/test_frame.py
100755 → 100644
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def test_getitem_boolean(self):


# test df[df >0] works # test df[df >0] works
bif = self.tsframe[self.tsframe > 0] bif = self.tsframe[self.tsframe > 0]
bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) bifw = DataFrame(np.where(self.tsframe > 0, self.tsframe, np.nan),
index=self.tsframe.index,columns=self.tsframe.columns)
self.assert_(isinstance(bif,DataFrame)) self.assert_(isinstance(bif,DataFrame))
self.assert_(bif.shape == self.tsframe.shape) self.assert_(bif.shape == self.tsframe.shape)
assert_frame_equal(bif,bifw) assert_frame_equal(bif,bifw)
Expand Down Expand Up @@ -285,8 +286,8 @@ def test_setitem_boolean(self):
assert_almost_equal(df.values, values) assert_almost_equal(df.values, values)


# a df that needs alignment first # a df that needs alignment first
df[df[:-1]<0] = 2 df[df[:-1] < 0] = 2
np.putmask(values[:-1],values[:-1]<0,2) np.putmask(values[:-1], values[:-1] < 0, 2)
assert_almost_equal(df.values, values) assert_almost_equal(df.values, values)


self.assertRaises(Exception, df.__setitem__, df * 0, 2) self.assertRaises(Exception, df.__setitem__, df * 0, 2)
Expand Down Expand Up @@ -5268,6 +5269,13 @@ def test_where(self):
self.assertRaises(ValueError, df.mask, True) self.assertRaises(ValueError, df.mask, True)
self.assertRaises(ValueError, df.mask, 0) self.assertRaises(ValueError, df.mask, 0)


# where inplace
df = DataFrame(np.random.randn(5, 3))

expected = df.mask(df < 0)
df.where(df >= 0, np.nan, inplace=True)
assert_frame_equal(df, expected)

def test_mask(self): def test_mask(self):
df = DataFrame(np.random.randn(5, 3)) df = DataFrame(np.random.randn(5, 3))
cond = df > 0 cond = df > 0
Expand Down Expand Up @@ -7232,13 +7240,15 @@ def test_xs_view(self):
def test_boolean_indexing(self): def test_boolean_indexing(self):
idx = range(3) idx = range(3)
cols = range(3) cols = range(3)
df1 = DataFrame(index=idx, columns=cols, \ df1 = DataFrame(index=idx, columns=cols,
data=np.array([[0.0, 0.5, 1.0], data=np.array([[0.0, 0.5, 1.0],
[1.5, 2.0, 2.5], [1.5, 2.0, 2.5],
[3.0, 3.5, 4.0]], dtype=float)) [3.0, 3.5, 4.0]],
df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols)))) dtype=float))

df2 = DataFrame(index=idx, columns=cols,
expected = DataFrame(index=idx, columns=cols, \ data=np.ones((len(idx), len(cols))))

expected = DataFrame(index=idx, columns=cols,
data=np.array([[0.0, 0.5, 1.0], data=np.array([[0.0, 0.5, 1.0],
[1.5, 2.0, -1], [1.5, 2.0, -1],
[-1, -1, -1]], dtype=float)) [-1, -1, -1]], dtype=float))
Expand Down

0 comments on commit ca8a6ba

Please sign in to comment.