Merge pull request #7950 from jreback/setting_with_copy

COMPAT: raise SettingWithCopy in even more situations when a view is at hand
pandas-dev · Aug 11, 2014 · f400014 · f400014
2 parents 123a555 + 70a17da
commit f400014
Show file tree

Hide file tree

Showing 14 changed files with 237 additions and 151 deletions.
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1481,7 +1481,8 @@ which can take the values ``['raise','warn',None]``, where showing a warning is
                            'three', 'two', 'one', 'six'],
                     'c' : np.arange(7)})
 
-   # passed via reference (will stay)
+   # This will show the SettingWithCopyWarning
+   # but the frame values will be set
    dfb['c'][dfb.a.str.startswith('o')] = 42
 
 This however is operating on a copy and will not work.

diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -114,7 +114,7 @@ API changes
         df
         df.dtypes
 
-- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`)
+- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`)
 
   .. code-block:: python
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1088,8 +1088,14 @@ def _maybe_cache_changed(self, item, value):
     @property
     def _is_cached(self):
         """ boolean : return if I am cached """
+        return getattr(self, '_cacher', None) is not None
+
+    def _get_cacher(self):
+        """ return my cacher or None """
         cacher = getattr(self, '_cacher', None)
-        return cacher is not None
+        if cacher is not None:
+            cacher = cacher[1]()
+        return cacher
 
     @property
     def _is_view(self):
@@ -1154,8 +1160,35 @@ def _set_is_copy(self, ref=None, copy=True):
             else:
                 self.is_copy = None
 
-    def _check_setitem_copy(self, stacklevel=4, t='setting'):
+    def _check_is_chained_assignment_possible(self):
+        """
+        check if we are a view, have a cacher, and are of mixed type
+        if so, then force a setitem_copy check
+
+        should be called just near setting a value
+
+        will return a boolean if it we are a view and are cached, but a single-dtype
+        meaning that the cacher should be updated following setting
         """
+        if self._is_view and self._is_cached:
+            ref = self._get_cacher()
+            if ref is not None and ref._is_mixed_type:
+                self._check_setitem_copy(stacklevel=4, t='referant', force=True)
+            return True
+        elif self.is_copy:
+            self._check_setitem_copy(stacklevel=4, t='referant')
+        return False
+
+    def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
+        """
+
+        Parameters
+        ----------
+        stacklevel : integer, default 4
+           the level to show of the stack when the error is output
+        t : string, the type of setting error
+        force : boolean, default False
+           if True, then force showing an error
 
         validate if we are doing a settitem on a chained copy.
 
@@ -1177,7 +1210,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting'):
 
         """
 
-        if self.is_copy:
+        if force or self.is_copy:
 
             value = config.get_option('mode.chained_assignment')
             if value is None:

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -472,6 +472,9 @@ def can_do_equal_len():
             if isinstance(value, ABCPanel):
                 value = self._align_panel(indexer, value)
 
+            # check for chained assignment
+            self.obj._check_is_chained_assignment_possible()
+
             # actually do the set
             self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
             self.obj._maybe_update_cacher(clear=True)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -587,61 +587,68 @@ def _get_values(self, indexer):
             return self.values[indexer]
 
     def __setitem__(self, key, value):
-        try:
-            self._set_with_engine(key, value)
-            return
-        except (SettingWithCopyError):
-            raise
-        except (KeyError, ValueError):
-            values = self.values
-            if (com.is_integer(key)
-                    and not self.index.inferred_type == 'integer'):
 
-                values[key] = value
+        def setitem(key, value):
+            try:
+                self._set_with_engine(key, value)
                 return
-            elif key is Ellipsis:
-                self[:] = value
+            except (SettingWithCopyError):
+                raise
+            except (KeyError, ValueError):
+                values = self.values
+                if (com.is_integer(key)
+                                and not self.index.inferred_type == 'integer'):
+
+                    values[key] = value
+                    return
+                elif key is Ellipsis:
+                    self[:] = value
+                    return
+                elif _is_bool_indexer(key):
+                    pass
+                elif com.is_timedelta64_dtype(self.dtype):
+                    # reassign a null value to iNaT
+                    if isnull(value):
+                        value = tslib.iNaT
+
+                        try:
+                            self.index._engine.set_value(self.values, key, value)
+                            return
+                        except (TypeError):
+                            pass
+
+                self.loc[key] = value
                 return
-            elif _is_bool_indexer(key):
-                pass
-            elif com.is_timedelta64_dtype(self.dtype):
-                # reassign a null value to iNaT
-                if isnull(value):
-                    value = tslib.iNaT
-
-                    try:
-                        self.index._engine.set_value(self.values, key, value)
-                        return
-                    except (TypeError):
-                        pass
-
-            self.loc[key] = value
-            return
 
-        except TypeError as e:
-            if isinstance(key, tuple) and not isinstance(self.index,
-                                                         MultiIndex):
-                raise ValueError("Can only tuple-index with a MultiIndex")
+            except TypeError as e:
+                if isinstance(key, tuple) and not isinstance(self.index,
+                                                             MultiIndex):
+                    raise ValueError("Can only tuple-index with a MultiIndex")
 
-            # python 3 type errors should be raised
-            if 'unorderable' in str(e):  # pragma: no cover
-                raise IndexError(key)
+                # python 3 type errors should be raised
+                if 'unorderable' in str(e):  # pragma: no cover
+                    raise IndexError(key)
 
-        if _is_bool_indexer(key):
-            key = _check_bool_indexer(self.index, key)
-            try:
-                self.where(~key, value, inplace=True)
-                return
-            except (InvalidIndexError):
-                pass
+            if _is_bool_indexer(key):
+                key = _check_bool_indexer(self.index, key)
+                try:
+                    self.where(~key, value, inplace=True)
+                    return
+                except (InvalidIndexError):
+                    pass
+
+            self._set_with(key, value)
 
-        self._set_with(key, value)
+        # do the setitem
+        cacher_needs_updating = self._check_is_chained_assignment_possible()
+        setitem(key, value)
+        if cacher_needs_updating:
+            self._maybe_update_cacher()
 
     def _set_with_engine(self, key, value):
         values = self.values
         try:
             self.index._engine.set_value(values, key, value)
-            self._check_setitem_copy()
             return
         except KeyError:
             values[self.index.get_loc(key)] = value

diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
@@ -305,13 +305,13 @@ def test_frame_from_json_nones(self):
         # infinities get mapped to nulls which get mapped to NaNs during
         # deserialisation
         df = DataFrame([[1, 2], [4, 5, 6]])
-        df[2][0] = np.inf
+        df.loc[0,2] = np.inf
         unser = read_json(df.to_json())
         self.assertTrue(np.isnan(unser[2][0]))
         unser = read_json(df.to_json(), dtype=False)
         self.assertTrue(np.isnan(unser[2][0]))
 
-        df[2][0] = np.NINF
+        df.loc[0,2] = np.NINF
         unser = read_json(df.to_json())
         self.assertTrue(np.isnan(unser[2][0]))
         unser = read_json(df.to_json(),dtype=False)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -1278,8 +1278,8 @@ def test_append_with_data_columns(self):
             # data column selection with a string data_column
             df_new = df.copy()
             df_new['string'] = 'foo'
-            df_new['string'][1:4] = np.nan
-            df_new['string'][5:6] = 'bar'
+            df_new.loc[1:4,'string'] = np.nan
+            df_new.loc[5:6,'string'] = 'bar'
             _maybe_remove(store, 'df')
             store.append('df', df_new, data_columns=['string'])
             result = store.select('df', [Term('string=foo')])
@@ -1317,14 +1317,14 @@ def check_col(key,name,size):
         with ensure_clean_store(self.path) as store:
             # multiple data columns
             df_new = df.copy()
-            df_new.loc[:,'A'].iloc[0] = 1.
-            df_new.loc[:,'B'].iloc[0] = -1.
+            df_new.ix[0,'A'] = 1.
+            df_new.ix[0,'B'] = -1.
             df_new['string'] = 'foo'
-            df_new['string'][1:4] = np.nan
-            df_new['string'][5:6] = 'bar'
+            df_new.loc[1:4,'string'] = np.nan
+            df_new.loc[5:6,'string'] = 'bar'
             df_new['string2'] = 'foo'
-            df_new['string2'][2:5] = np.nan
-            df_new['string2'][7:8] = 'bar'
+            df_new.loc[2:5,'string2'] = np.nan
+            df_new.loc[7:8,'string2'] = 'bar'
             _maybe_remove(store, 'df')
             store.append(
                 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -1348,8 +1348,8 @@ def test_to_string(self):
                             'B': tm.makeStringIndex(200)},
                            index=lrange(200))
 
-        biggie['A'][:20] = nan
-        biggie['B'][:20] = nan
+        biggie.loc[:20,'A'] = nan
+        biggie.loc[:20,'B'] = nan
         s = biggie.to_string()
 
         buf = StringIO()
@@ -1597,8 +1597,8 @@ def test_to_html(self):
                             'B': tm.makeStringIndex(200)},
                            index=lrange(200))
 
-        biggie['A'][:20] = nan
-        biggie['B'][:20] = nan
+        biggie.loc[:20,'A'] = nan
+        biggie.loc[:20,'B'] = nan
         s = biggie.to_html()
 
         buf = StringIO()
@@ -1624,8 +1624,8 @@ def test_to_html_filename(self):
                             'B': tm.makeStringIndex(200)},
                            index=lrange(200))
 
-        biggie['A'][:20] = nan
-        biggie['B'][:20] = nan
+        biggie.loc[:20,'A'] = nan
+        biggie.loc[:20,'B'] = nan
         with tm.ensure_clean('test.html') as path:
             biggie.to_html(path)
             with open(path, 'r') as f: