From a8cc5c0cddaa12b03c2235ea2707e7cdcd60804a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 2 Feb 2018 19:03:37 -0800 Subject: [PATCH 1/6] de-duplicate, remove unused arguments --- pandas/core/frame.py | 49 ++++++++++++++----------------------- pandas/core/indexes/base.py | 47 +++++++++++++++-------------------- pandas/core/ops.py | 8 +++--- pandas/core/sparse/frame.py | 14 +++-------- 4 files changed, 47 insertions(+), 71 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96d28581cfdd9..44a9043a265ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3907,8 +3907,7 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related - def _combine_frame(self, other, func, fill_value=None, level=None, - try_cast=True): + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -3960,52 +3959,40 @@ def f(i): def _combine_series(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): + if fill_value is not None: + raise NotImplementedError("fill_value %r not supported." % + fill_value) + if axis is not None: axis = self._get_axis_name(axis) if axis == 'index': - return self._combine_match_index(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) + return self._combine_match_index(other, func, level=level) else: return self._combine_match_columns(other, func, level=level, - fill_value=fill_value, try_cast=try_cast) - return self._combine_series_infer(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) + else: + if len(other) == 0: + return self * np.nan - def _combine_series_infer(self, other, func, level=None, - fill_value=None, try_cast=True): - if len(other) == 0: - return self * np.nan + if len(self) == 0: + # Ambiguous case, use _series so works with DataFrame + return self._constructor(data=self._series, index=self.index, + columns=self.columns) - if len(self) == 0: - # Ambiguous case, use _series so works with DataFrame - return self._constructor(data=self._series, index=self.index, - columns=self.columns) + # default axis is columns + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) - return self._combine_match_columns(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) - - def _combine_match_index(self, other, func, level=None, - fill_value=None, try_cast=True): + def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) - if fill_value is not None: - raise NotImplementedError("fill_value %r not supported." % - fill_value) return self._constructor(func(left.values.T, right.values).T, index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None, - fill_value=None, try_cast=True): + def _combine_match_columns(self, other, func, level=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) - if fill_value is not None: - raise NotImplementedError("fill_value %r not supported" % - fill_value) new_data = left._data.eval(func=func, other=right, axes=[left.columns, self.index], diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 626f3dc86556a..e5776589a0902 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3916,30 +3916,12 @@ def _evaluate_compare(self, other): @classmethod def _add_numeric_methods_add_sub_disabled(cls): """ add in the numeric add/sub methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable other than add/sub """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') @@ -4145,15 +4127,6 @@ def logical_func(self, *args, **kwargs): @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.all = _make_invalid_op('all') cls.any = _make_invalid_op('any') @@ -4163,6 +4136,26 @@ def invalid_op(self, other=None): Index._add_comparison_methods() +def _make_invalid_op(name): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + def _ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6ea4a81cb52a1..ad63a9aaa41e8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1070,12 +1070,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, fill_value, axis, level) + return self._combine_series(other, na_op, fill_value, axis, level, + try_cast=True) else: if fill_value is not None: self = self.fillna(fill_value) - return self._combine_const(other, na_op) + return self._combine_const(other, na_op, try_cast=True) f.__name__ = name @@ -1136,7 +1137,8 @@ def f(self, other): if isinstance(other, ABCDataFrame): # Another DataFrame return self._compare_frame(other, func, str_rep) elif isinstance(other, ABCSeries): - return self._combine_series_infer(other, func, try_cast=False) + return self._combine_series(other, func, + axis=None, try_cast=False) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 91dc44e3f185e..122c2b11f25f9 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -540,8 +540,7 @@ def xs(self, key, axis=0, copy=False): # ---------------------------------------------------------------------- # Arithmetic-related methods - def _combine_frame(self, other, func, fill_value=None, level=None, - try_cast=True): + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -584,12 +583,9 @@ def _combine_frame(self, other, func, fill_value=None, level=None, default_fill_value=new_fill_value ).__finalize__(self) - def _combine_match_index(self, other, func, level=None, fill_value=None, - try_cast=True): + def _combine_match_index(self, other, func, level=None): new_data = {} - if fill_value is not None: - raise NotImplementedError("'fill_value' argument is not supported") if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -605,6 +601,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None, new_data[col] = func(series.values, other.values) # fill_value is a function of our operator + fill_value = None if isna(other.fill_value) or isna(self.default_fill_value): fill_value = np.nan else: @@ -615,15 +612,12 @@ def _combine_match_index(self, other, func, level=None, fill_value=None, new_data, index=new_index, columns=self.columns, default_fill_value=fill_value).__finalize__(self) - def _combine_match_columns(self, other, func, level=None, fill_value=None, - try_cast=True): + def _combine_match_columns(self, other, func, level=None, try_cast=True): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still # possible for this to happen, which is bothersome - if fill_value is not None: - raise NotImplementedError("'fill_value' argument is not supported") if level is not None: raise NotImplementedError("'level' argument is not supported") From ce3e616dfeeb5235981e6df1f491b328b885d0ce Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 2 Feb 2018 19:33:22 -0800 Subject: [PATCH 2/6] avoid nameerror --- pandas/core/indexes/base.py | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e5776589a0902..b591203f43a6f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -80,6 +80,26 @@ def _try_get_item(x): return x +def _make_invalid_op(name): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + class InvalidIndexError(Exception): pass @@ -4136,26 +4156,6 @@ def _add_logical_methods_disabled(cls): Index._add_comparison_methods() -def _make_invalid_op(name): - """ - Return a binary method that always raises a TypeError. - - Parameters - ---------- - name : str - - Returns - ------- - invalid_op : function - """ - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - def _ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. From 6c0811a5742f367ba65ad7e66be16dcae313e408 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Feb 2018 09:21:33 -0800 Subject: [PATCH 3/6] requested edits --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5dfbb355374cc..882ea982afe29 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3968,8 +3968,8 @@ def f(i): def _combine_series(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): if fill_value is not None: - raise NotImplementedError("fill_value %r not supported." % - fill_value) + raise NotImplementedError("fill_value {fill} not supported." + .format(fill=fill_value)) if axis is not None: axis = self._get_axis_name(axis) @@ -3979,10 +3979,10 @@ def _combine_series(self, other, func, fill_value=None, axis=None, return self._combine_match_columns(other, func, level=level, try_cast=try_cast) else: - if len(other) == 0: + if not len(other): return self * np.nan - if len(self) == 0: + if not len(self): # Ambiguous case, use _series so works with DataFrame return self._constructor(data=self._series, index=self.index, columns=self.columns) From 2de02687943b0c2278206ae7842c17b49f40fa16 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 5 Feb 2018 18:32:10 -0800 Subject: [PATCH 4/6] test, whatsnew note --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/tests/frame/test_operators.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69965f44d87a8..e812c1551408e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -486,6 +486,7 @@ Numeric - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic methods that failed to raise ``NotImplementedError` in cases where either argument has length zero (:issue:`19522`) Indexing diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bdccbec6111d3..611b7bcca0952 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -451,6 +451,18 @@ def test_arith_flex_frame(self): with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3) + def test_arith_flex_zero_len_raises(self): + # GH#19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser = pd.Series([]) + df = pd.DataFrame([[1, 2], [3, 4]]) + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df.add(ser, fill_value='E') + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + ser.to_frame().sub(df[0], axis=None, fill_value=3) + def test_binary_ops_align(self): # test aligning binary ops From 98fdcef5743292855bfae3f4b94b35f586ce1a1d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Feb 2018 08:14:35 -0800 Subject: [PATCH 5/6] flesh out whatsnew --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/frame/test_operators.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e467ddb05b72f..1df8ea324eb79 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -581,7 +581,7 @@ Numeric - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- Bug in :class:`DataFrame` flex arithmetic methods that failed to raise ``NotImplementedError` in cases where either argument has length zero (:issue:`19522`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. `df.add(other, fill_value=foo)`) with a `fill_value` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) Indexing diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 611b7bcca0952..8e0dab4832cc1 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -454,14 +454,15 @@ def test_arith_flex_frame(self): def test_arith_flex_zero_len_raises(self): # GH#19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases - ser = pd.Series([]) - df = pd.DataFrame([[1, 2], [3, 4]]) + ser_len0 = pd.Series([]) + df_len0 = pd.DataFrame([], columns=['A', 'B']) + df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - df.add(ser, fill_value='E') + df.add(ser_len0, fill_value='E') with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - ser.to_frame().sub(df[0], axis=None, fill_value=3) + df_len0.sub(df[0], axis=None, fill_value=3) def test_binary_ops_align(self): From 85d501c3f084291c3eeaa9e09065a3329f446316 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Feb 2018 10:39:45 -0800 Subject: [PATCH 6/6] Fixup columns changed --- pandas/tests/frame/test_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 8e0dab4832cc1..f679a9820784e 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -462,7 +462,7 @@ def test_arith_flex_zero_len_raises(self): df.add(ser_len0, fill_value='E') with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - df_len0.sub(df[0], axis=None, fill_value=3) + df_len0.sub(df['A'], axis=None, fill_value=3) def test_binary_ops_align(self):