diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 4c932cf3600e8..4e82fa5592529 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -5,7 +5,7 @@ from pandas import (Series, DataFrame, MultiIndex, Int64Index, UInt64Index, Float64Index, IntervalIndex, CategoricalIndex, - IndexSlice, concat, date_range) + IndexSlice, concat, date_range, option_context) class NumericSeriesIndexing: @@ -335,4 +335,20 @@ def time_assign_with_setitem(self): self.df[i] = np.random.randn(self.N) +class ChainIndexing: + + params = [None, 'warn'] + param_names = ['mode'] + + def setup(self, mode): + self.N = 1000000 + + def time_chained_indexing(self, mode): + with warnings.catch_warnings(record=True): + with option_context('mode.chained_assignment', mode): + df = DataFrame({'A': np.arange(self.N), 'B': 'foo'}) + df2 = df[df.A > self.N // 2] + df2['C'] = 1.0 + + from .pandas_vb_common import setup # noqa: F401 diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 901e4f6942897..ffd5ba19cd074 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -579,6 +579,7 @@ Performance Improvements - Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`) - Restored performance of :meth:`DatetimeIndex.__iter__` by re-enabling specialized code path (:issue:`26702`) - Improved performance when building :class:`MultiIndex` with at least one :class:`CategoricalIndex` level (:issue:`22044`) +- Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) .. _whatsnew_0250.bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 992c83e66090e..1af3e9449f3da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3263,58 +3263,50 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): """ - if force or self._is_copy: + # return early if the check is not needed + if not (force or self._is_copy): + return - value = config.get_option('mode.chained_assignment') - if value is None: - return - - # see if the copy is not actually referred; if so, then dissolve - # the copy weakref - try: - gc.collect(2) - if not gc.get_referents(self._is_copy()): - self._is_copy = None - return - except Exception: - pass + value = config.get_option('mode.chained_assignment') + if value is None: + return - # we might be a false positive - try: - if self._is_copy().shape == self.shape: - self._is_copy = None - return - except Exception: - pass + # see if the copy is not actually referred; if so, then dissolve + # the copy weakref + if self._is_copy is not None and not isinstance(self._is_copy, str): + r = self._is_copy() + if not gc.get_referents(r) or r.shape == self.shape: + self._is_copy = None + return - # a custom message - if isinstance(self._is_copy, str): - t = self._is_copy + # a custom message + if isinstance(self._is_copy, str): + t = self._is_copy - elif t == 'referant': - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) + elif t == 'referant': + t = ("\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame\n\n" + "See the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) - else: - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value " - "instead\n\nSee the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - if value == 'raise': - raise com.SettingWithCopyError(t) - elif value == 'warn': - warnings.warn(t, com.SettingWithCopyWarning, - stacklevel=stacklevel) + else: + t = ("\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame.\n" + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + if value == 'raise': + raise com.SettingWithCopyError(t) + elif value == 'warn': + warnings.warn(t, com.SettingWithCopyWarning, + stacklevel=stacklevel) def __delitem__(self, key): """