pandas-dev · jreback · Jun 8, 2018 · May 23, 2018 · May 23, 2018 · May 23, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -178,9 +178,18 @@ Reshaping
 -
 -
 
+ExtensionArray
+^^^^^^^^^^^^^^
+
+- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
+- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
+-
+-
+
 Other
 ^^^^^
 
 -
 -
 -
+-
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2204,7 +2204,7 @@ def _binop(self, other, func, level=None, fill_value=None):
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
@@ -2216,6 +2216,8 @@ def combine(self, other, func, fill_value=np.nan):
         func : function
             Function that takes two scalars as inputs and return a scalar
         fill_value : scalar value
+            The default specifies to use the appropriate NaN value for
+            the underlying dtype of the Series
 
         Returns
         -------
@@ -2235,20 +2237,38 @@ def combine(self, other, func, fill_value=np.nan):
         Series.combine_first : Combine Series values, choosing the calling
             Series's values first
         """
+        if fill_value is None:
+            fill_value = na_value_for_dtype(self.dtype, compat=False)
+
         if isinstance(other, Series):
+            # If other is a Series, result is based on union of Series,
+            # so do this element by element
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=self.dtype)
-            for i, idx in enumerate(new_index):
+            new_values = []
+            for idx in new_index:
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all='ignore'):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
+            # Assume that other is a scalar, so apply the function for
+            # each element in the Series
             new_index = self.index
             with np.errstate(all='ignore'):
-                new_values = func(self._values, other)
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
+
+        if is_categorical_dtype(self.values):
+            pass
+        elif is_extension_array_dtype(self.values):
+            # The function can return something of any type, so check
+            # if the type is compatible with the calling EA
+            try:
+                new_values = self._values._from_sequence(new_values)
+            except TypeError:
+                pass
+
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other):

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -103,3 +103,37 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
 
         tm.assert_numpy_array_equal(l1, l2)
         self.assert_extension_array_equal(u1, u2)
+
+    def test_combine_le(self, data_repeated):
+        # GH 20825
+        # Test that combine works when doing a <= (le) comparison
+        orig_data1, orig_data2 = data_repeated(2)
+        s1 = pd.Series(orig_data1)
+        s2 = pd.Series(orig_data2)
+        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
+        expected = pd.Series([a <= b for (a, b) in
+                              zip(list(orig_data1), list(orig_data2))])
+        self.assert_series_equal(result, expected)
+
+        val = s1.iloc[0]
+        result = s1.combine(val, lambda x1, x2: x1 <= x2)
+        expected = pd.Series([a <= val for a in list(orig_data1)])
+        self.assert_series_equal(result, expected)
+
+    def test_combine_add(self, data_repeated):
+        # GH 20825
+        orig_data1, orig_data2 = data_repeated(2)
+        s1 = pd.Series(orig_data1)
+        s2 = pd.Series(orig_data2)
+        result = s1.combine(s2, lambda x1, x2: x1 + x2)
+        expected = pd.Series(
+            orig_data1._from_sequence([a + b for (a, b) in
+                                       zip(list(orig_data1),
+                                           list(orig_data2))]))
+        self.assert_series_equal(result, expected)
+
+        val = s1.iloc[0]
+        result = s1.combine(val, lambda x1, x2: x1 + x2)
+        expected = pd.Series(
+            orig_data1._from_sequence([a + val for a in list(orig_data1)]))
+        self.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -1,6 +1,7 @@
 import string
 
 import pytest
+import pandas as pd
 import numpy as np
 
 from pandas.api.types import CategoricalDtype
@@ -29,6 +30,15 @@ def data_missing():
     return Categorical([np.nan, 'A'])
 
 
+@pytest.fixture
+def data_repeated():
+    """Return different versions of data for count times"""
+    def gen(count):
+        for _ in range(count):
+            yield Categorical(make_data())
+    yield gen
+
+
 @pytest.fixture
 def data_for_sorting():
     return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
@@ -154,6 +164,22 @@ class TestMethods(base.BaseMethodsTests):
     def test_value_counts(self, all_data, dropna):
         pass
 
+    def test_combine_add(self, data_repeated):
+        # GH 20825
+        # When adding categoricals in combine, result is a string
+        orig_data1, orig_data2 = data_repeated(2)
+        s1 = pd.Series(orig_data1)
+        s2 = pd.Series(orig_data2)
+        result = s1.combine(s2, lambda x1, x2: x1 + x2)
+        expected = pd.Series(([a + b for (a, b) in
+                               zip(list(orig_data1), list(orig_data2))]))
+        self.assert_series_equal(result, expected)
+
+        val = s1.iloc[0]
+        result = s1.combine(val, lambda x1, x2: x1 + x2)
+        expected = pd.Series([a + val for a in list(orig_data1)])
+        self.assert_series_equal(result, expected)
+
 
 class TestCasting(base.BaseCastingTests):
     pass
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
@@ -30,6 +30,15 @@ def all_data(request, data, data_missing):
         return data_missing
 
 
+@pytest.fixture
+def data_repeated():
+    """Return different versions of data for count times"""
+    def gen(count):
+        for _ in range(count):
+            yield NotImplementedError
+    yield gen
+
+
 @pytest.fixture
 def data_for_sorting():
     """Length-3 array with a known sort order.

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -28,7 +28,9 @@ class DecimalArray(ExtensionArray):
     dtype = DecimalDtype()
 
     def __init__(self, values):
-        assert all(isinstance(v, decimal.Decimal) for v in values)
+        for val in values:
+            if not isinstance(val, self.dtype.type):
+                raise TypeError
         values = np.asarray(values, dtype=object)
 
         self._data = values

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -25,6 +25,14 @@ def data_missing():
     return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
 
+@pytest.fixture
+def data_repeated():
+    def gen(count):
+        for _ in range(count):
+            yield DecimalArray(make_data())
+    yield gen
+
+
 @pytest.fixture
 def data_for_sorting():
     return DecimalArray([decimal.Decimal('1'),

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -187,6 +187,14 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending):
         super(TestMethods, self).test_sort_values_missing(
             data_missing_for_sorting, ascending)
 
+    @pytest.mark.skip(reason="combine for JSONArray not supported")
+    def test_combine_le(self, data_repeated):
+        pass
+
+    @pytest.mark.skip(reason="combine for JSONArray not supported")
+    def test_combine_add(self, data_repeated):
+        pass
+
 
 class TestCasting(BaseJSON, base.BaseCastingTests):
     @pytest.mark.xfail

diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
@@ -60,6 +60,19 @@ def test_append_duplicates(self):
         with tm.assert_raises_regex(ValueError, msg):
             pd.concat([s1, s2], verify_integrity=True)
 
+    def test_combine_scalar(self):
+        # GH 21248
+        # Note - combine() with another Series is tested elsewhere because
+        # it is used when testing operators
+        s = pd.Series([i * 10 for i in range(5)])
+        result = s.combine(3, lambda x, y: x + y)
+        expected = pd.Series([i * 10 + 3 for i in range(5)])
+        tm.assert_series_equal(result, expected)
+
+        result = s.combine(22, lambda x, y: min(x, y))
+        expected = pd.Series([min(i * 10, 22) for i in range(5)])
+        tm.assert_series_equal(result, expected)
+
     def test_combine_first(self):
         values = tm.makeIntIndex(20).values.astype(float)
         series = Series(values, index=tm.makeIntIndex(20))