diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 91575c311b409e..80399ca31a9ad9 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -48,7 +48,7 @@ Pandas has gained the ability to hold integer dtypes with missing values. This l
 Here is an example of the usage.
 
 We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
-marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`)
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`, :issue:`21789`, :issue:`22346`)
 
 .. ipython:: python
 
@@ -91,6 +91,13 @@ These dtypes can be merged & reshaped & casted.
    pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
    df['A'].astype(float)
 
+Reduction and groupby operations such as 'sum' work.
+
+.. ipython:: python
+
+   df.sum()
+   df.groupby('B').A.sum()
+
 .. warning::
 
    The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 621de3ffd4b12c..d24400d09809e7 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -131,6 +131,29 @@ def all_arithmetic_operators(request):
     return request.param
 
 
+_all_numeric_reductions = ['sum', 'max', 'min',
+                           'mean', 'prod', 'std', 'var', 'median']
+
+
+@pytest.fixture(params=_all_numeric_reductions)
+def all_numeric_reductions(request):
+    """
+    Fixture for numeric reduction names
+    """
+    return request.param
+
+
+_all_boolean_reductions = ['all', 'any']
+
+
+@pytest.fixture(params=_all_boolean_reductions)
+def all_boolean_reductions(request):
+    """
+    Fixture for boolean reduction names
+    """
+    return request.param
+
+
 _cython_table = pd.core.base.SelectionMixin._cython_table.items()
 
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f7c4ee35adfe48..60f3b2a123ddc9 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -59,6 +59,10 @@ class ExtensionArray(object):
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
 
+    One can implement methods to handle array reductions.
+
+    * _reduce
+
     The remaining methods implemented on this class should be performant,
     as they only compose abstract methods. Still, a more efficient
     implementation may be available, and these methods can be overridden.
@@ -708,6 +712,31 @@ def _add_comparison_ops(cls):
         cls.__le__ = cls._create_comparison_method(operator.le)
         cls.__ge__ = cls._create_comparison_method(operator.ge)
 
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwargs):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwargs : dict
+
+        Returns
+        -------
+        scalar
+        """
+        raise AbstractMethodError(self)
+
 
 class ExtensionScalarOpsMixin(ExtensionOpsMixin):
     """A mixin for defining the arithmetic and logical operations on
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index e58109a25e1a57..7e1e7315232aa5 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -529,6 +529,55 @@ def cmp_method(self, other):
         name = '__{name}__'.format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
 
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwds):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwds : dict
+
+        Returns
+        -------
+        scalar
+        """
+
+        data = self._data
+        mask = self._mask
+
+        # coerce to a nan-aware float if needed
+        if mask.any():
+            data = self._data.astype('float64')
+            data[mask] = self._na_value
+
+        result = op(data, axis=axis, skipna=skipna)
+
+        # if we have a boolean op, provide coercion back to a bool
+        # type if possible
+        if name in ['any', 'all']:
+            if is_integer(result) or is_float(result):
+                result = bool(int(result))
+
+        # if we have a numeric op, provide coercion back to an integer
+        # type if possible
+        elif not isna(result):
+            int_result = int(result)
+            if int_result == result:
+                result = int_result
+
+        return result
+
     def _maybe_mask_result(self, result, mask, other, op_name):
         """
         Parameters
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 349a6aee5701ea..3f3653f375149e 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -587,15 +587,18 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_mean_included():
+@pytest.mark.parametrize('op', ['sum', 'min', 'max'])
+def test_preserve_groupby_dtypes(op):
+    # TODO(#22346): preserve Int64 dtype
+    # for ops that enable (mean would actually work here
+    # but generally it is a float return value)
     df = pd.DataFrame({
         "A": ['a', 'b', 'b'],
         "B": [1, None, 3],
         "C": integer_array([1, None, 3], dtype='Int64'),
     })
 
-    result = df.groupby("A").sum()
-    # TODO(#22346): preserve Int64 dtype
+    result = getattr(df.groupby("A"), op)()
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
         "C": np.array([1, 3], dtype="int64")
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index b6b81bb941a59c..b5d4fd676245ef 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -48,6 +48,7 @@ class TestMyDtype(BaseDtypeTests):
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
+from .reduce import BaseNumericReduceTests, BaseBooleanReduceTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
 from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 174997c7d51e17..52c635d286df6e 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -25,8 +25,8 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
                            "B": data_for_grouping})
         result = df.groupby("B", as_index=as_index).A.mean()
         _, index = pd.factorize(data_for_grouping, sort=True)
-        # TODO(ExtensionIndex): remove astype
-        index = pd.Index(index.astype(object), name="B")
+
+        index = pd.Index(index, name="B")
         expected = pd.Series([3, 1, 4], index=index, name="A")
         if as_index:
             self.assert_series_equal(result, expected)
@@ -39,8 +39,8 @@ def test_groupby_extension_no_sort(self, data_for_grouping):
                            "B": data_for_grouping})
         result = df.groupby("B", sort=False).A.mean()
         _, index = pd.factorize(data_for_grouping, sort=False)
-        # TODO(ExtensionIndex): remove astype
-        index = pd.Index(index.astype(object), name="B")
+
+        index = pd.Index(index, name="B")
         expected = pd.Series([1, 3, 4], index=index, name="A")
         self.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
new file mode 100644
index 00000000000000..ce53ba91ff2fc0
--- /dev/null
+++ b/pandas/tests/extension/base/reduce.py
@@ -0,0 +1,38 @@
+import warnings
+import pytest
+import pandas.util.testing as tm
+import pandas as pd
+from .base import BaseExtensionTests
+
+
+class BaseReduceTests(BaseExtensionTests):
+    """
+    Reduction specific tests. Generally these only
+    make sense for numeric/boolean operations.
+    """
+    def check_reduce(self, s, op_name, skipna):
+        result = getattr(s, op_name)(skipna=skipna)
+        expected = getattr(s.astype('float64'), op_name)(skipna=skipna)
+        tm.assert_almost_equal(result, expected)
+
+
+class BaseNumericReduceTests(BaseReduceTests):
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
+        s = pd.Series(data)
+
+        # min/max with empty produce numpy warnings
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("ignore", RuntimeWarning)
+            self.check_reduce(s, op_name, skipna)
+
+
+class BaseBooleanReduceTests(BaseReduceTests):
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series(self, data, all_boolean_reductions, skipna):
+        op_name = all_boolean_reductions
+        s = pd.Series(data)
+        self.check_reduce(s, op_name, skipna)
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index fa5c89d85e5481..163f2ad6fc72b7 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -208,17 +208,39 @@ class TestCasting(base.BaseCastingTests):
 
 class TestGroupby(base.BaseGroupbyTests):
 
-    @pytest.mark.xfail(reason="groupby not working", strict=True)
-    def test_groupby_extension_no_sort(self, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_no_sort(
-            data_for_grouping)
-
-    @pytest.mark.parametrize('as_index', [
-        pytest.param(True,
-                     marks=pytest.mark.xfail(reason="groupby not working",
-                                             strict=True)),
-        False
-    ])
+    @pytest.mark.parametrize('as_index', [True, False])
     def test_groupby_extension_agg(self, as_index, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_agg(
-            as_index, data_for_grouping)
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        result = df.groupby("B", as_index=as_index).A.mean()
+        _, index = pd.factorize(data_for_grouping, sort=True)
+
+        # TODO(ExtensionIndex): remove coercion to object
+        # we don't have an easy way to represent an EA as an Index object
+        index = pd.Index(index, name="B", dtype=object)
+        expected = pd.Series([3, 1, 4], index=index, name="A")
+        if as_index:
+            self.assert_series_equal(result, expected)
+        else:
+            expected = expected.reset_index()
+            self.assert_frame_equal(result, expected)
+
+    def test_groupby_extension_no_sort(self, data_for_grouping):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        result = df.groupby("B", sort=False).A.mean()
+        _, index = pd.factorize(data_for_grouping, sort=False)
+
+        # TODO(ExtensionIndex): remove coercion to object
+        # we don't have an easy way to represent an EA as an Index object
+        index = pd.Index(index, name="B", dtype=object)
+        expected = pd.Series([1, 3, 4], index=index, name="A")
+        self.assert_series_equal(result, expected)
+
+
+class TestNumericReduce(base.BaseNumericReduceTests):
+    pass
+
+
+class TestBooleanReduce(base.BaseBooleanReduceTests):
+    pass