pandas-dev · jreback · Nov 6, 2018 · Oct 24, 2018 · Oct 24, 2018 · Oct 24, 2018
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -27,7 +27,9 @@ class providing the base-class of operations.
 )
 from pandas.core.config import option_context
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
-from pandas.core.dtypes.common import ensure_float, is_numeric_dtype, is_scalar
+from pandas.core.dtypes.common import (
+    ensure_float, is_numeric_dtype, is_scalar, is_extension_array_dtype
+)
 from pandas.core.dtypes.missing import isna, notna
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
@@ -753,7 +755,18 @@ def _try_cast(self, result, obj, numeric_only=False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if numeric_only and is_numeric_dtype(dtype) or not numeric_only:
+            if is_extension_array_dtype(dtype):
+                # The function can return something of any type, so check
+                # if the type is compatible with the calling EA.
+                try:
+                    result = obj.values._from_sequence(result)
+                except Exception:
+                    # https://github.com/pandas-dev/pandas/issues/22850
+                    # pandas has no control over what 3rd-party ExtensionArrays
+                    # do in _values_from_sequence. We still want ops to work
+                    # though, so we catch any regular Exception.
+                    pass
+            elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
                 result = maybe_downcast_to_dtype(result, dtype)
 
         return result

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -652,11 +652,9 @@ def test_preserve_dtypes(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
-    expected = pd.DataFrame({
-        "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="int64")
-    }, index=pd.Index(['a', 'b'], name='A'))
-    tm.assert_frame_equal(result, expected)
+
+    assert result.dtypes['B'].name == 'float64'
+    assert result.dtypes['C'].name == 'Int64'
 
 
 @pytest.mark.parametrize('op', ['mean'])
@@ -675,11 +673,23 @@ def test_reduce_to_float(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
-    expected = pd.DataFrame({
-        "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="float64")
-    }, index=pd.Index(['a', 'b'], name='A'))
-    tm.assert_frame_equal(result, expected)
+
+    assert result.dtypes['B'].name == 'float64'
+    assert result.dtypes['C'].name == 'Int64'
+
+
+@pytest.mark.parametrize('op', ['sum'])
+def test_groupby_extension_array(op):
+    # GH23227
+    # groupby on an extension array should return the extension array type
+    df = pd.DataFrame({
+        'Int': pd.Series([1, 2, 3], dtype='Int64'),
+        'A': [1, 2, 1]
+    })
+
+    result = getattr(df.groupby('A').Int, op)()
+    assert result is not None
+    assert result.dtype.name == 'Int64'
 
 
 def test_astype_nansafe():

diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
@@ -26,25 +26,25 @@ def test_first_last_nth(self):
 
         # TODO: shouldn't these all be spares or not?
         tm.assert_frame_equal(sparse_grouped.first(),
-                              dense_grouped.first())
+                              dense_grouped.first().to_sparse())
         tm.assert_frame_equal(sparse_grouped.last(),
-                              dense_grouped.last())
+                              dense_grouped.last().to_sparse())
         tm.assert_frame_equal(sparse_grouped.nth(1),
                               dense_grouped.nth(1).to_sparse())
 
     def test_aggfuncs(self):
         sparse_grouped = self.sparse.groupby('A')
         dense_grouped = self.dense.groupby('A')
 
-        tm.assert_frame_equal(sparse_grouped.mean(),
-                              dense_grouped.mean())
+        tm.assert_frame_equal(sparse_grouped.mean().to_sparse(),
+                              dense_grouped.mean().to_sparse())
 
         # ToDo: sparse sum includes str column
         # tm.assert_frame_equal(sparse_grouped.sum(),
         #                       dense_grouped.sum())
 
-        tm.assert_frame_equal(sparse_grouped.count(),
-                              dense_grouped.count())
+        tm.assert_frame_equal(sparse_grouped.count().to_sparse(),
+                              dense_grouped.count().to_sparse())
 
 
 @pytest.mark.parametrize("fill_value", [0, np.nan])
@@ -55,5 +55,5 @@ def test_groupby_includes_fill_value(fill_value):
     sdf = df.to_sparse(fill_value=fill_value)
     result = sdf.groupby('a').sum()
     expected = df.groupby('a').sum()
-    tm.assert_frame_equal(result, expected,
+    tm.assert_frame_equal(result, expected.to_sparse(fill_value=fill_value),
                           check_index_type=False)
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
@@ -1576,6 +1576,7 @@ def test_resample_categorical_data_with_timedeltaindex(self):
                               'Group': ['A', 'A']},
                              index=pd.to_timedelta([0, 10], unit='s'))
         expected = expected.reindex(['Group_obj', 'Group'], axis=1)
+        expected['Group'] = expected['Group_obj'].astype('category')
         tm.assert_frame_equal(result, expected)
 
     def test_resample_daily_anchored(self):