From 5a96841325bb4fb44f8a1dd312036d0e8614103a Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 12 Nov 2018 23:54:56 +0100 Subject: [PATCH 1/4] API/DEPR: replace "raise_conflict" with "errors" for df.update --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/frame.py | 18 ++++++--- pandas/core/panel.py | 45 +++++++++++++++-------- pandas/tests/frame/test_combine_concat.py | 4 +- pandas/tests/test_panel.py | 7 +++- 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3938fba4648b1..647e3482aa53c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -973,6 +973,7 @@ Deprecations - The ``fastpath`` keyword of the different Index constructors is deprecated (:issue:`23110`). - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) +- The methods :meth:`DataFrame.update` and :meth:`Panel.update` have deprecated the ``raise_conflict=False|True`` keyword in favor of ``errors='ignore'|'raise'`` (:issue:`23585`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f6f91ff5081f1..7c525ecc83432 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5230,8 +5230,10 @@ def combiner(x, y): return self.combine(other, combiner, overwrite=False) + @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors', + mapping={False: 'ignore', True: 'raise'}) def update(self, other, join='left', overwrite=True, filter_func=None, - raise_conflict=False): + errors='ignore'): """ Modify in place using non-NA values from another DataFrame. @@ -5255,17 +5257,21 @@ def update(self, other, join='left', overwrite=True, filter_func=None, * False: only update values that are NA in the original DataFrame. - filter_func : callable(1d-array) -> boolean 1d-array, optional + filter_func : callable(1d-array) -> bool 1d-array, optional Can choose to replace values other than NA. Return True for values that should be updated. - raise_conflict : bool, default False - If True, will raise a ValueError if the DataFrame and `other` + errors : {'raise', 'ignore'}, default 'ignore' + If 'raise', will raise a ValueError if the DataFrame and `other` both contain non-NA data in the same place. + Returns + ------- + None : method directly changes calling object + Raises ------ ValueError - When `raise_conflict` is True and there's overlapping non-NA data. + When `errors='raise'` and there's overlapping non-NA data. See Also -------- @@ -5349,7 +5355,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, with np.errstate(all='ignore'): mask = ~filter_func(this) | isna(that) else: - if raise_conflict: + if errors == 'raise': mask_this = notna(that) mask_that = notna(this) if any(mask_this & mask_that): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c878d16fac2e9..1d58b1e641c88 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -32,7 +32,7 @@ create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.reshape.util import cartesian_product -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_axis_style_args _shared_doc_kwargs = dict( @@ -1235,7 +1235,12 @@ def reindex(self, *args, **kwargs): kwargs.update(axes) kwargs.pop('axis', None) kwargs.pop('labels', None) - return super(Panel, self).reindex(**kwargs) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + # do not warn about constructing Panel when reindexing + result = super(Panel, self).reindex(**kwargs) + return result @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.rename.__doc__) @@ -1377,25 +1382,33 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) + @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors', + mapping={False: 'ignore', True: 'raise'}) def update(self, other, join='left', overwrite=True, filter_func=None, - raise_conflict=False): + errors='ignore'): """ - Modify Panel in place using non-NA values from passed - Panel, or object coercible to Panel. Aligns on items + Modify Panel in place using non-NA values from other Panel. + + May also use object coercible to Panel. Will align on items. Parameters ---------- other : Panel, or object coercible to Panel - join : How to join individual DataFrames - {'left', 'right', 'outer', 'inner'}, default 'left' - overwrite : boolean, default True - If True then overwrite values for common keys in the calling panel - filter_func : callable(1d-array) -> 1d-array, default None + The object from which the caller will be udpated. + join : {'left', 'right', 'outer', 'inner'}, default 'left' + How individual DataFrames are joined. + overwrite : bool, default True + If True then overwrite values for common keys in the calling Panel. + filter_func : callable(1d-array) -> 1d-array, default None Can choose to replace values other than NA. Return True for values - that should be updated - raise_conflict : bool - If True, will raise an error if a DataFrame and other both - contain data in the same place. + that should be updated. + errors : {'raise', 'ignore'}, default 'ignore' + If 'raise', will raise an error if a DataFrame and other both. + + See Also + -------- + DataFrame.update : Similar method for DataFrames. + dict.update : Similar method for dictionaries. """ if not isinstance(other, self._constructor): @@ -1406,8 +1419,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, other = other.reindex(**{axis_name: axis_values}) for frame in axis_values: - self[frame].update(other[frame], join, overwrite, filter_func, - raise_conflict) + self[frame].update(other[frame], join=join, overwrite=overwrite, + filter_func=filter_func, errors=errors) def _get_join_index(self, other, how): if how == 'left': diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 22c5d146e1a06..884f11c3c8aa8 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -322,7 +322,9 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) with pytest.raises(ValueError, match="Data overlaps"): - df.update(other, raise_conflict=True) + df.update(other, errors='raise') + with tm.assert_produces_warning(FutureWarning): + df.update(other, raise_conflict=False) def test_update_from_non_df(self): d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index bc644071e914f..bfaa4f0b5e351 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2348,9 +2348,12 @@ def test_update_raise(self): [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) + other = Panel([[[]]]) - pytest.raises(Exception, pan.update, *(pan, ), - **{'raise_conflict': True}) + with pytest.raises(ValueError, match='Data overlaps'): + pan.update(pan, errors='raise') + with tm.assert_produces_warning(FutureWarning): + pan.update(other, raise_conflict=True) def test_all_any(self): assert (self.panel.all(axis=0).values == nanall( From 8f9ee07d93eed9a1a2db526324852228a883e47e Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 14 Nov 2018 21:38:18 +0100 Subject: [PATCH 2/4] Review (jreback) --- pandas/core/frame.py | 11 ++++++++++- pandas/tests/frame/test_combine_concat.py | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8a4502b082c8a..f44db0190df66 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5246,6 +5246,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If 'raise', will raise a ValueError if the DataFrame and `other` both contain non-NA data in the same place. + .. versionchanged :: 0.24.0 + Changed from `raise_conflict=False|True` to `errors='ignore'|'raise'`. + Returns ------- None : method directly changes calling object @@ -5253,7 +5256,10 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Raises ------ ValueError - When `errors='raise'` and there's overlapping non-NA data. + * When `errors='raise'` and there's overlapping non-NA data. + * When `errors` is not either `'ignore'` or `'raise'` + NotImplementedError + * If `join != 'left'` See Also -------- @@ -5324,6 +5330,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, # TODO: Support other joins if join != 'left': # pragma: no cover raise NotImplementedError("Only left join is supported") + if errors not in ['ignore', 'raise']: + raise ValueError("The parameter errors must be either " + "'ignore' or 'raise'") if not isinstance(other, DataFrame): other = DataFrame(other) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 884f11c3c8aa8..25c5222b5f03c 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -313,7 +313,17 @@ def test_update_filtered(self): [1.5, nan, 7.]]) assert_frame_equal(df, expected) - def test_update_raise(self): + @pytest.mark.parametrize('bad_kwarg, exception, msg', [ + # errors must be 'ignore' or 'raise' + ({'errors': 'something'}, ValueError, 'The parameter errors must.*'), + ({'join': 'inner'}, NotImplementedError, 'Only left join is supported') + ]) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + df = DataFrame([[1.5, 1, 3.]]) + with pytest.raises(exception, match=msg): + df.update(df, **bad_kwarg) + + def test_update_raise_on_overlap(self): df = DataFrame([[1.5, 1, 3.], [1.5, nan, 3.], [1.5, nan, 3], @@ -323,8 +333,13 @@ def test_update_raise(self): [nan, 7]], index=[1, 3], columns=[1, 2]) with pytest.raises(ValueError, match="Data overlaps"): df.update(other, errors='raise') + + @pytest.mark.parametrize('raise_conflict', [True, False]) + def test_update_deprecation(self, raise_conflict): + df = DataFrame([[1.5, 1, 3.]]) + other = DataFrame() with tm.assert_produces_warning(FutureWarning): - df.update(other, raise_conflict=False) + df.update(other, raise_conflict=raise_conflict) def test_update_from_non_df(self): d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} From 067c4f222b511422a6b922770db0a3407e161344 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 14 Nov 2018 21:43:30 +0100 Subject: [PATCH 3/4] Adapt Panel tests as well; lint --- pandas/core/frame.py | 3 ++- pandas/core/panel.py | 4 ++++ pandas/tests/test_panel.py | 20 +++++++++++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f44db0190df66..aca6144bef558 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5247,7 +5247,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, both contain non-NA data in the same place. .. versionchanged :: 0.24.0 - Changed from `raise_conflict=False|True` to `errors='ignore'|'raise'`. + Changed from `raise_conflict=False|True` + to `errors='ignore'|'raise'`. Returns ------- diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1d58b1e641c88..5ae7848b5adc6 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1405,6 +1405,10 @@ def update(self, other, join='left', overwrite=True, filter_func=None, errors : {'raise', 'ignore'}, default 'ignore' If 'raise', will raise an error if a DataFrame and other both. + .. versionchanged :: 0.24.0 + Changed from `raise_conflict=False|True` + to `errors='ignore'|'raise'`. + See Also -------- DataFrame.update : Similar method for DataFrames. diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index bfaa4f0b5e351..0e45fd6411ac0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2341,19 +2341,33 @@ def test_update_filtered(self): assert_panel_equal(pan, expected) - def test_update_raise(self): + @pytest.mark.parametrize('bad_kwarg, exception, msg', [ + # errors must be 'ignore' or 'raise' + ({'errors': 'something'}, ValueError, 'The parameter errors must.*'), + ({'join': 'inner'}, NotImplementedError, 'Only left join is supported') + ]) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + pan = Panel([[[1.5, np.nan, 3.]]]) + with pytest.raises(exception, match=msg): + pan.update(pan, **bad_kwarg) + + def test_update_raise_on_overlap(self): pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]], [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) - other = Panel([[[]]]) with pytest.raises(ValueError, match='Data overlaps'): pan.update(pan, errors='raise') + + @pytest.mark.parametrize('raise_conflict', [True, False]) + def test_update_deprecation(self, raise_conflict): + pan = Panel([[[1.5, np.nan, 3.]]]) + other = Panel([[[]]]) with tm.assert_produces_warning(FutureWarning): - pan.update(other, raise_conflict=True) + pan.update(other, raise_conflict=raise_conflict) def test_all_any(self): assert (self.panel.all(axis=0).values == nanall( From 5ff6261d07b82f54e3ce031a0fc908304d5c0c81 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 01:03:10 +0100 Subject: [PATCH 4/4] Restart CI after timeout