Skip to content

Commit

Permalink
Merge pull request #7950 from jreback/setting_with_copy
Browse files Browse the repository at this point in the history
COMPAT: raise SettingWithCopy in even more situations when a view is at hand
  • Loading branch information
jreback committed Aug 11, 2014
2 parents 123a555 + 70a17da commit f400014
Show file tree
Hide file tree
Showing 14 changed files with 237 additions and 151 deletions.
3 changes: 2 additions & 1 deletion doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,8 @@ which can take the values ``['raise','warn',None]``, where showing a warning is
'three', 'two', 'one', 'six'],
'c' : np.arange(7)})
# passed via reference (will stay)
# This will show the SettingWithCopyWarning
# but the frame values will be set
dfb['c'][dfb.a.str.startswith('o')] = 42
This however is operating on a copy and will not work.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ API changes
df
df.dtypes

- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`)
- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`)

.. code-block:: python

Expand Down
39 changes: 36 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,14 @@ def _maybe_cache_changed(self, item, value):
@property
def _is_cached(self):
""" boolean : return if I am cached """
return getattr(self, '_cacher', None) is not None

def _get_cacher(self):
""" return my cacher or None """
cacher = getattr(self, '_cacher', None)
return cacher is not None
if cacher is not None:
cacher = cacher[1]()
return cacher

@property
def _is_view(self):
Expand Down Expand Up @@ -1154,8 +1160,35 @@ def _set_is_copy(self, ref=None, copy=True):
else:
self.is_copy = None

def _check_setitem_copy(self, stacklevel=4, t='setting'):
def _check_is_chained_assignment_possible(self):
"""
check if we are a view, have a cacher, and are of mixed type
if so, then force a setitem_copy check
should be called just near setting a value
will return a boolean if it we are a view and are cached, but a single-dtype
meaning that the cacher should be updated following setting
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
return True
elif self.is_copy:
self._check_setitem_copy(stacklevel=4, t='referant')
return False

def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""
Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error
validate if we are doing a settitem on a chained copy.
Expand All @@ -1177,7 +1210,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting'):
"""

if self.is_copy:
if force or self.is_copy:

value = config.get_option('mode.chained_assignment')
if value is None:
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,9 @@ def can_do_equal_len():
if isinstance(value, ABCPanel):
value = self._align_panel(indexer, value)

# check for chained assignment
self.obj._check_is_chained_assignment_possible()

# actually do the set
self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
self.obj._maybe_update_cacher(clear=True)
Expand Down
93 changes: 50 additions & 43 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,61 +587,68 @@ def _get_values(self, indexer):
return self.values[indexer]

def __setitem__(self, key, value):
try:
self._set_with_engine(key, value)
return
except (SettingWithCopyError):
raise
except (KeyError, ValueError):
values = self.values
if (com.is_integer(key)
and not self.index.inferred_type == 'integer'):

values[key] = value
def setitem(key, value):
try:
self._set_with_engine(key, value)
return
elif key is Ellipsis:
self[:] = value
except (SettingWithCopyError):
raise
except (KeyError, ValueError):
values = self.values
if (com.is_integer(key)
and not self.index.inferred_type == 'integer'):

values[key] = value
return
elif key is Ellipsis:
self[:] = value
return
elif _is_bool_indexer(key):
pass
elif com.is_timedelta64_dtype(self.dtype):
# reassign a null value to iNaT
if isnull(value):
value = tslib.iNaT

try:
self.index._engine.set_value(self.values, key, value)
return
except (TypeError):
pass

self.loc[key] = value
return
elif _is_bool_indexer(key):
pass
elif com.is_timedelta64_dtype(self.dtype):
# reassign a null value to iNaT
if isnull(value):
value = tslib.iNaT

try:
self.index._engine.set_value(self.values, key, value)
return
except (TypeError):
pass

self.loc[key] = value
return

except TypeError as e:
if isinstance(key, tuple) and not isinstance(self.index,
MultiIndex):
raise ValueError("Can only tuple-index with a MultiIndex")
except TypeError as e:
if isinstance(key, tuple) and not isinstance(self.index,
MultiIndex):
raise ValueError("Can only tuple-index with a MultiIndex")

# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
raise IndexError(key)
# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
raise IndexError(key)

if _is_bool_indexer(key):
key = _check_bool_indexer(self.index, key)
try:
self.where(~key, value, inplace=True)
return
except (InvalidIndexError):
pass
if _is_bool_indexer(key):
key = _check_bool_indexer(self.index, key)
try:
self.where(~key, value, inplace=True)
return
except (InvalidIndexError):
pass

self._set_with(key, value)

self._set_with(key, value)
# do the setitem
cacher_needs_updating = self._check_is_chained_assignment_possible()
setitem(key, value)
if cacher_needs_updating:
self._maybe_update_cacher()

def _set_with_engine(self, key, value):
values = self.values
try:
self.index._engine.set_value(values, key, value)
self._check_setitem_copy()
return
except KeyError:
values[self.index.get_loc(key)] = value
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,13 +305,13 @@ def test_frame_from_json_nones(self):
# infinities get mapped to nulls which get mapped to NaNs during
# deserialisation
df = DataFrame([[1, 2], [4, 5, 6]])
df[2][0] = np.inf
df.loc[0,2] = np.inf
unser = read_json(df.to_json())
self.assertTrue(np.isnan(unser[2][0]))
unser = read_json(df.to_json(), dtype=False)
self.assertTrue(np.isnan(unser[2][0]))

df[2][0] = np.NINF
df.loc[0,2] = np.NINF
unser = read_json(df.to_json())
self.assertTrue(np.isnan(unser[2][0]))
unser = read_json(df.to_json(),dtype=False)
Expand Down
16 changes: 8 additions & 8 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,8 +1278,8 @@ def test_append_with_data_columns(self):
# data column selection with a string data_column
df_new = df.copy()
df_new['string'] = 'foo'
df_new['string'][1:4] = np.nan
df_new['string'][5:6] = 'bar'
df_new.loc[1:4,'string'] = np.nan
df_new.loc[5:6,'string'] = 'bar'
_maybe_remove(store, 'df')
store.append('df', df_new, data_columns=['string'])
result = store.select('df', [Term('string=foo')])
Expand Down Expand Up @@ -1317,14 +1317,14 @@ def check_col(key,name,size):
with ensure_clean_store(self.path) as store:
# multiple data columns
df_new = df.copy()
df_new.loc[:,'A'].iloc[0] = 1.
df_new.loc[:,'B'].iloc[0] = -1.
df_new.ix[0,'A'] = 1.
df_new.ix[0,'B'] = -1.
df_new['string'] = 'foo'
df_new['string'][1:4] = np.nan
df_new['string'][5:6] = 'bar'
df_new.loc[1:4,'string'] = np.nan
df_new.loc[5:6,'string'] = 'bar'
df_new['string2'] = 'foo'
df_new['string2'][2:5] = np.nan
df_new['string2'][7:8] = 'bar'
df_new.loc[2:5,'string2'] = np.nan
df_new.loc[7:8,'string2'] = 'bar'
_maybe_remove(store, 'df')
store.append(
'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,8 +1348,8 @@ def test_to_string(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
s = biggie.to_string()

buf = StringIO()
Expand Down Expand Up @@ -1597,8 +1597,8 @@ def test_to_html(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
s = biggie.to_html()

buf = StringIO()
Expand All @@ -1624,8 +1624,8 @@ def test_to_html_filename(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
with tm.ensure_clean('test.html') as path:
biggie.to_html(path)
with open(path, 'r') as f:
Expand Down
Loading

0 comments on commit f400014

Please sign in to comment.