pandas-dev · jreback · Nov 6, 2013 · Oct 30, 2013 · jtratner · Nov 2, 2013
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1330,24 +1330,34 @@ indexing operation, the result will be a copy. With single label / scalar
 indexing and slicing, e.g. ``df.ix[3:6]`` or ``df.ix[:, 'A']``, a view will be
 returned.
 
-In chained expressions, the order may determine whether a copy is returned or not:
+In chained expressions, the order may determine whether a copy is returned or not.
+If an expression will set values on a copy of a slice, then a ``SettingWithCopy``
+exception will be raised (this raise/warn behavior is new starting in 0.13.0)
 
-.. ipython:: python
+You can control the action of a chained assignment via the option ``mode.chained_assignment``,
+which can take the values ``['raise','warn',None]``, where showing a warning is the default.
 
+.. ipython:: python
 
    dfb = DataFrame({'a' : ['one', 'one', 'two',
                            'three', 'two', 'one', 'six'],
-                    'b' : ['x', 'y', 'y',
-                           'x', 'y', 'x', 'x'],
-                    'c' : randn(7)})
-
-
-   # goes to copy (will be lost)
-   dfb[dfb.a.str.startswith('o')]['c'] = 42
+                    'c' : np.arange(7)})
 
    # passed via reference (will stay)
    dfb['c'][dfb.a.str.startswith('o')] = 42
 
+This however is operating on a copy and will not work.
+
+::
+
+   >>> pd.set_option('mode.chained_assignment','warn')
+   >>> dfb[dfb.a.str.startswith('o')]['c'] = 42
+   Traceback (most recent call last)
+        ...
+   SettingWithCopyWarning:
+        A value is trying to be set on a copy of a slice from a DataFrame.
+        Try using .loc[row_index,col_indexer] = value instead
+
 A chained assignment can also crop up in setting in a mixed dtype frame.
 
 .. note::
@@ -1359,28 +1369,35 @@ This is the correct access method
 .. ipython:: python
 
    dfc = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
-   dfc_copy = dfc.copy()
-   dfc_copy.loc[0,'A'] = 11
-   dfc_copy
+   dfc.loc[0,'A'] = 11
+   dfc
 
 This *can* work at times, but is not guaranteed, and so should be avoided
 
 .. ipython:: python
 
-   dfc_copy = dfc.copy()
-   dfc_copy['A'][0] = 111
-   dfc_copy
+   dfc = dfc.copy()
+   dfc['A'][0] = 111
+   dfc
 
 This will **not** work at all, and so should be avoided
 
-.. ipython:: python
+::
+
+   >>> pd.set_option('mode.chained_assignment','raise')
+   >>> dfc.loc[0]['A'] = 1111
+   Traceback (most recent call last)
+        ...
+   SettingWithCopyException:
+        A value is trying to be set on a copy of a slice from a DataFrame.
+        Try using .loc[row_index,col_indexer] = value instead
+
+.. warning::
 
-   dfc_copy = dfc.copy()
-   dfc_copy.loc[0]['A'] = 1111
-   dfc_copy
+   The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid
+   assignment. There may be false positives; situations where a chained assignment is inadvertantly
+   reported.
 
-When assigning values to subsets of your data, thus, make sure to either use the
-pandas access methods or explicitly handle the assignment creating a copy.
 
 Fallback indexing
 -----------------

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -396,6 +396,9 @@ API Changes
     3    4.000000
     dtype: float64
 
+  - raise/warn ``SettingWithCopyError/Warning`` exception/warning when setting of a
+    copy thru chained assignment is detected, settable via option ``mode.chained_assignment``
+
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -104,6 +104,34 @@ API changes
 - ``Series`` and ``DataFrame`` now have a ``mode()`` method to calculate the
   statistical mode(s) by axis/Series. (:issue:`5367`)
 
+- Chained assignment will now by default warn if the user is assigning to a copy. This can be changed
+  with he option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. See :ref:`the docs<indexing.view_versus_copy>`.
+
+  .. ipython:: python
+
+     dfc = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
+     pd.set_option('chained_assignment','warn')
+
+  The following warning / exception will show if this is attempted.
+
+  .. ipython:: python
+
+     dfc.loc[0]['A'] = 1111
+
+  ::
+
+     Traceback (most recent call last)
+        ...
+     SettingWithCopyWarning:
+        A value is trying to be set on a copy of a slice from a DataFrame.
+        Try using .loc[row_index,col_indexer] = value instead
+
+  Here is the correct method of assignment.
+
+  .. ipython:: python
+
+     dfc.loc[0,'A'] = 11
+     dfc
 
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -26,6 +26,11 @@
 class PandasError(Exception):
     pass
 
+class SettingWithCopyError(ValueError):
+    pass
+
+class SettingWithCopyWarning(Warning):
+    pass
 
 class AmbiguousIndexError(PandasError, KeyError):
     pass

diff --git a/pandas/core/config.py b/pandas/core/config.py
@@ -512,6 +512,13 @@ def _get_root(key):
         cursor = cursor[p]
     return cursor, path[-1]
 
+def _get_option_fast(key):
+    """ internal quick access routine, no error checking """
+    path = key.split('.')
+    cursor = _global_config
+    for p in path:
+        cursor = cursor[p]
+    return cursor
 
 def _is_deprecated(key):
     """ Returns True if the given option has been deprecated """

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -271,7 +271,6 @@ def mpl_style_cb(key):
 # We don't want to start importing everything at the global context level
 # or we'll hit circular deps.
 
-
 def use_inf_as_null_cb(key):
     from pandas.core.common import _use_inf_as_null
     _use_inf_as_null(key)
@@ -281,6 +280,17 @@ def use_inf_as_null_cb(key):
                        cb=use_inf_as_null_cb)
 
 
+# user warnings
+chained_assignment = """
+: string
+    Raise an exception, warn, or no action if trying to use chained assignment, The default is warn
+"""
+
+with cf.config_prefix('mode'):
+    cf.register_option('chained_assignment', 'warn', chained_assignment,
+                       validator=is_one_of_factory([None, 'warn', 'raise']))
+
+
 # Set up the io.excel specific configuration.
 writer_engine_doc = """
 : string

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1547,12 +1547,9 @@ def _ixs(self, i, axis=0, copy=False):
                     i = _maybe_convert_indices(i, len(self._get_axis(axis)))
                     return self.reindex(i, takeable=True)
                 else:
-                    try:
-                        new_values = self._data.fast_2d_xs(i, copy=copy)
-                    except:
-                        new_values = self._data.fast_2d_xs(i, copy=True)
+                    new_values, copy = self._data.fast_2d_xs(i, copy=copy)
                     return Series(new_values, index=self.columns,
-                                  name=self.index[i])
+                                  name=self.index[i])._setitem_copy(copy)
 
         # icol
         else:
@@ -1892,10 +1889,18 @@ def _set_item(self, key, value):
         Series/TimeSeries will be conformed to the DataFrame's index to
         ensure homogeneity.
         """
+
+        is_existing = key in self.columns
         self._ensure_valid_index(value)
         value = self._sanitize_column(key, value)
         NDFrame._set_item(self, key, value)
 
+        # check if we are modifying a copy
+        # try to set first as we want an invalid
+        # value exeption to occur first
+        if is_existing:
+            self._check_setitem_copy()
+
     def insert(self, loc, column, value, allow_duplicates=False):
         """
         Insert column into DataFrame at specified location.
@@ -2093,13 +2098,16 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True):
                 new_index = self.index[loc]
 
         if np.isscalar(loc):
-            new_values = self._data.fast_2d_xs(loc, copy=copy)
-            return Series(new_values, index=self.columns,
-                          name=self.index[loc])
+
+            new_values, copy = self._data.fast_2d_xs(loc, copy=copy)
+            result = Series(new_values, index=self.columns,
+                            name=self.index[loc])._setitem_copy(copy)
+
         else:
             result = self[loc]
             result.index = new_index
-            return result
+
+        return result
 
     _xs = xs
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -19,9 +19,11 @@
 from pandas import compat, _np_version_under1p7
 from pandas.compat import map, zip, lrange, string_types, isidentifier
 from pandas.core.common import (isnull, notnull, is_list_like,
-                                _values_from_object, _maybe_promote, ABCSeries)
+                                _values_from_object, _maybe_promote, ABCSeries,
+                                SettingWithCopyError, SettingWithCopyWarning)
 import pandas.core.nanops as nanops
 from pandas.util.decorators import Appender, Substitution
+from pandas.core import config
 
 # goal is to be able to define the docs close to function, while still being
 # able to share
@@ -69,7 +71,7 @@ class NDFrame(PandasObject):
     copy : boolean, default False
     """
     _internal_names = [
-        '_data', 'name', '_cacher', '_subtyp', '_index', '_default_kind', '_default_fill_value']
+        '_data', 'name', '_cacher', '_is_copy', '_subtyp', '_index', '_default_kind', '_default_fill_value']
     _internal_names_set = set(_internal_names)
     _metadata = []
 
@@ -85,6 +87,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False):
                 for i, ax in enumerate(axes):
                     data = data.reindex_axis(ax, axis=i)
 
+        object.__setattr__(self, '_is_copy', False)
         object.__setattr__(self, '_data', data)
         object.__setattr__(self, '_item_cache', {})
 
@@ -988,6 +991,22 @@ def _set_item(self, key, value):
         self._data.set(key, value)
         self._clear_item_cache()
 
+    def _setitem_copy(self, copy):
+        """ set the _is_copy of the iiem """
+        self._is_copy = copy
+        return self
+
+    def _check_setitem_copy(self):
+        """ validate if we are doing a settitem on a chained copy """
+        if self._is_copy:
+            value = config._get_option_fast('mode.chained_assignment')
+
+            t = "A value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_index,col_indexer] = value instead"
+            if value == 'raise':
+                raise SettingWithCopyError(t)
+            elif value == 'warn':
+                warnings.warn(t,SettingWithCopyWarning)
+
     def __delitem__(self, key):
         """
         Delete item
@@ -1049,7 +1068,7 @@ def take(self, indices, axis=0, convert=True):
             new_data = self._data.reindex_axis(new_items, indexer=indices, axis=0)
         else:
             new_data = self._data.take(indices, axis=baxis)
-        return self._constructor(new_data).__finalize__(self)
+        return self._constructor(new_data)._setitem_copy(True).__finalize__(self)
 
     # TODO: Check if this was clearer in 0.12
     def select(self, crit, axis=0):

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -2567,22 +2567,20 @@ def fast_2d_xs(self, loc, copy=False):
         """
         get a cross sectional for a given location in the
         items ; handle dups
+
+        return the result and a flag if a copy was actually made
         """
         if len(self.blocks) == 1:
             result = self.blocks[0].values[:, loc]
             if copy:
                 result = result.copy()
-            return result
-
-        if not copy:
-            raise TypeError('cannot get view of mixed-type or '
-                            'non-consolidated DataFrame')
+            return result, copy
 
         items = self.items
 
         # non-unique (GH4726)
         if not items.is_unique:
-            return self._interleave(items).ravel()
+            return self._interleave(items).ravel(), True
 
         # unique
         dtype = _interleaved_dtype(self.blocks)
@@ -2593,7 +2591,7 @@ def fast_2d_xs(self, loc, copy=False):
                 i = items.get_loc(item)
                 result[i] = blk._try_coerce_result(blk.iget((j, loc)))
 
-        return result
+        return result, True
 
     def consolidate(self):
         """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -22,7 +22,8 @@
                                 _values_from_object,
                                 _possibly_cast_to_datetime, _possibly_castable,
                                 _possibly_convert_platform,
-                                ABCSparseArray, _maybe_match_name, _ensure_object)
+                                ABCSparseArray, _maybe_match_name, _ensure_object,
+                                SettingWithCopyError)
 
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
@@ -575,6 +576,8 @@ def __setitem__(self, key, value):
         try:
             self._set_with_engine(key, value)
             return
+        except (SettingWithCopyError):
+            raise
         except (KeyError, ValueError):
             values = self.values
             if (com.is_integer(key)
@@ -623,6 +626,7 @@ def _set_with_engine(self, key, value):
         values = self.values
         try:
             self.index._engine.set_value(values, key, value)
+            self._check_setitem_copy()
             return
         except KeyError:
             values[self.index.get_loc(key)] = value

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -11059,9 +11059,11 @@ def test_xs_view(self):
         dm.xs(2)[:] = 10
         self.assert_((dm.xs(2) == 5).all())
 
+        # prior to chained assignment (GH5390)
+        # this would raise, but now just rrens a copy (and sets _is_copy)
         # TODO (?): deal with mixed-type fiasco?
-        with assertRaisesRegexp(TypeError, 'cannot get view of mixed-type'):
-            self.mixed_frame.xs(self.mixed_frame.index[2], copy=False)
+        # with assertRaisesRegexp(TypeError, 'cannot get view of mixed-type'):
+        #    self.mixed_frame.xs(self.mixed_frame.index[2], copy=False)
 
         # unconsolidated
         dm['foo'] = 6.