pydata · hameerabbasi · Sep 19, 2018 · Sep 19, 2018 · Sep 19, 2018 · Sep 19, 2018
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -4,6 +4,7 @@ Changelog
 Upcoming Release
 ----------------
 
+* Added :code:`COO.mean` and :code:`sparse.nanmean` (:pr:`190`).
 * Added :code:`sparse.full` and :code:`sparse.full_like` (:pr:`189`).
 * Added :code:`COO.clip` method (:pr:`185`).
 * Added :code:`COO.copy` method, and changed pickle of :code:`COO` to not

diff --git a/docs/generated/sparse.COO.mean.rst b/docs/generated/sparse.COO.mean.rst
@@ -0,0 +1,6 @@
+COO\.mean
+=========
+
+.. currentmodule:: sparse
+
+.. automethod:: COO.mean
diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst
@@ -43,6 +43,7 @@ COO
       COO.reduce
 
       COO.sum
+      COO.mean
       COO.prod
       COO.min
       COO.max

diff --git a/docs/generated/sparse.nanmean.rst b/docs/generated/sparse.nanmean.rst
@@ -0,0 +1,6 @@
+nanmean
+=======
+
+.. currentmodule:: sparse
+
+.. autofunction:: nanmean
diff --git a/docs/generated/sparse.rst b/docs/generated/sparse.rst
@@ -41,6 +41,8 @@ API
 
     nanmax
 
+    nanmean
+
     nanmin
 
     nanprod

diff --git a/sparse/coo/__init__.py b/sparse/coo/__init__.py
@@ -1,5 +1,5 @@
 from .core import COO, as_coo
 from .umath import elemwise
 from .common import (tensordot, dot, concatenate, stack, triu, tril, where,
-                     nansum, nanprod, nanmin, nanmax, nanreduce, roll, eye,
-                     full, full_like, zeros, zeros_like, ones, ones_like)
+                     nansum, nanmean, nanprod, nanmin, nanmax, nanreduce, roll,
+                     eye, full, full_like, zeros, zeros_like, ones, ones_like)
diff --git a/sparse/coo/common.py b/sparse/coo/common.py
@@ -426,6 +426,62 @@ def nansum(x, axis=None, keepdims=False, dtype=None, out=None):
     return nanreduce(x, np.add, axis=axis, keepdims=keepdims, dtype=dtype)
 
 
+def nanmean(x, axis=None, keepdims=False, dtype=None, out=None):
+    """
+    Performs a ``NaN`` skipping mean operation along the given axes. Uses all axes by default.
+
+    Parameters
+    ----------
+    x : SparseArray
+        The array to perform the reduction on.
+    axis : Union[int, Iterable[int]], optional
+        The axes along which to compute the mean. Uses all axes by default.
+    keepdims : bool, optional
+        Whether or not to keep the dimensions of the original array.
+    dtype: numpy.dtype
+        The data type of the output array.
+
+    Returns
+    -------
+    COO
+        The reduced output sparse array.
+
+    See Also
+    --------
+    :obj:`COO.mean` : Function without ``NaN`` skipping.
+    numpy.nanmean : Equivalent Numpy function.
+    """
+    assert out is None
+    x = asCOO(x, name='nanmean')
+
+    if not np.issubdtype(x.dtype, np.floating):
+        return x.mean(axis=axis, keepdims=keepdims, dtype=dtype)
+
+    mask = np.isnan(x)
+    x2 = where(mask, 0, x)
+
+    # Count the number non-nan elements along axis
+    nancount = mask.sum(axis=axis, dtype='i8', keepdims=keepdims)
+    if axis is None:
+        axis = tuple(range(x.ndim))
+    elif not isinstance(axis, tuple):
+        axis = (axis,)
+    den = 1
+    for ax in axis:
+        den *= x.shape[ax]
+    den -= nancount
+
+    if np.any(den == 0):
+        warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
+
+    num = np.sum(x2, axis=axis, dtype=dtype, keepdims=keepdims)
+
+    with np.errstate(invalid='ignore', divide='ignore'):
+        if num.ndim:
+            return np.true_divide(num, den, casting='unsafe')
+        return (num / den).astype(dtype)
+
+
 def nanmax(x, axis=None, keepdims=False, dtype=None, out=None):
     """
     Maximize along the given axes, skipping ``NaN`` values. Uses all axes by default.

diff --git a/sparse/coo/core.py b/sparse/coo/core.py
@@ -1076,6 +1076,93 @@ def prod(self, axis=None, keepdims=False, dtype=None, out=None):
         """
         return np.multiply.reduce(self, out=out, axis=axis, keepdims=keepdims, dtype=dtype)
 
+    def mean(self, axis=None, keepdims=False, dtype=None, out=None):
+        """
+        Compute the mean along the given axes. Uses all axes by default.
+
+        Parameters
+        ----------
+        axis : Union[int, Iterable[int]], optional
+            The axes along which to compute the mean. Uses all axes by default.
+        keepdims : bool, optional
+            Whether or not to keep the dimensions of the original array.
+        dtype: numpy.dtype
+            The data type of the output array.
+
+        Returns
+        -------
+        COO
+            The reduced output sparse array.
+
+        See Also
+        --------
+        numpy.ndarray.mean : Equivalent numpy method.
+        scipy.sparse.coo_matrix.mean : Equivalent Scipy method.
+
+        Notes
+        -----
+        * This function internally calls :obj:`COO.sum_duplicates` to bring the
+          array into canonical form.
+        * The :code:`out` parameter is provided just for compatibility with
+          Numpy and isn't actually supported.
+
+        Examples
+        --------
+        You can use :obj:`COO.mean` to compute the mean of an array across any
+        dimension.
+
+        >>> x = np.array([[1, 2, 0, 0],
+        ...               [0, 1, 0, 0]], dtype='i8')
+        >>> s = COO.from_numpy(x)
+        >>> s2 = s.mean(axis=1)
+        >>> s2.todense()  # doctest: +SKIP
+        array([0.5, 1.5, 0., 0.])
+
+        You can also use the :code:`keepdims` argument to keep the dimensions
+        after the mean.
+
+        >>> s3 = s.mean(axis=0, keepdims=True)
+        >>> s3.shape
+        (1, 4)
+
+        You can pass in an output datatype, if needed.
+
+        >>> s4 = s.mean(axis=0, dtype=np.float16)
+        >>> s4.dtype
+        dtype('float16')
+
+        By default, this reduces the array down to one number, computing the
+        mean along all axes.
+
+        >>> s.mean()
+        0.5
+        """
+        if axis is None:
+            axis = tuple(range(self.ndim))
+        elif not isinstance(axis, tuple):
+            axis = (axis,)
+        den = 1
+        for ax in axis:
+            den *= self.shape[ax]
+
+        if dtype is None:
+            if issubclass(self.dtype.type, (np.integer, np.bool_)):
+                out_dtype = inter_dtype = np.dtype('f8')
+            else:
+                out_dtype = self.dtype
+                inter_dtype = (np.dtype('f4')
+                               if issubclass(out_dtype.type, np.float16)
+                               else out_dtype)
+        else:
+            out_dtype = inter_dtype = dtype
+
+        num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype)
+
+        if num.ndim:
+            out = np.true_divide(num, den, casting='unsafe')
+            return out.astype(out_dtype) if out.dtype != out_dtype else out
+        return (num / den).astype(out_dtype)
+
     def transpose(self, axes=None):
         """
         Returns a new array which has the order of the axes switched.

diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py
@@ -11,52 +11,88 @@
 from sparse.utils import assert_eq, random_value_array
 
 
-@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [
-    ('max', {}, {}),
-    ('sum', {}, {}),
-    ('sum', {'dtype': np.float16}, {'atol': 1e-2}),
-    ('prod', {}, {}),
-    ('min', {}, {}),
+@pytest.fixture(scope='module', params=['f8', 'f4', 'i8', 'i4'])
+def random_sparse(request):
+    dtype = request.param
+    if np.issubdtype(dtype, np.integer):
+        def data_rvs(n):
+            return np.random.randint(-10000, 10000, n)
+    else:
+        data_rvs = None
+    return sparse.random((20, 30, 40), density=.25, data_rvs=data_rvs).astype(dtype)
+
+
+@pytest.mark.parametrize('reduction, kwargs', [
+    ('sum', {}),
+    ('sum', {'dtype': np.float32}),
+    ('mean', {}),
+    ('mean', {'dtype': np.float32}),
+    ('prod', {}),
+    ('max', {}),
+    ('min', {}),
 ])
 @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)])
 @pytest.mark.parametrize('keepdims', [True, False])
-def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs):
-    x = sparse.random((2, 3, 4), density=.25)
+def test_reductions(reduction, random_sparse, axis, keepdims, kwargs):
+    x = random_sparse
     y = x.todense()
     xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs)
     yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs)
-    assert_eq(xx, yy, **eqkwargs)
+    assert_eq(xx, yy)
 
 
-@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [
-    ('any', {}, {}),
-    ('all', {}, {}),
+@pytest.mark.xfail(reason=('Setting output dtype=float16 produces results '
+                           'inconsistent with numpy'))
+@pytest.mark.filterwarnings('ignore:overflow')
+@pytest.mark.parametrize('reduction, kwargs', [
+    ('sum', {'dtype': np.float16}),
+    ('mean', {'dtype': np.float16}),
+])
+@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2)])
+def test_reductions_float16(random_sparse, reduction, kwargs, axis):
+    x = random_sparse
+    y = x.todense()
+    xx = getattr(x, reduction)(axis=axis, **kwargs)
+    yy = getattr(y, reduction)(axis=axis, **kwargs)
+    assert_eq(xx, yy, atol=1e-2)
+
+
+@pytest.mark.parametrize('reduction,kwargs', [
+    ('any', {}),
+    ('all', {}),
 ])
 @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)])
 @pytest.mark.parametrize('keepdims', [True, False])
-def test_reductions_bool(reduction, axis, keepdims, kwargs, eqkwargs):
-    x = sparse.random((2, 3, 4), density=.25).astype(bool)
-    y = x.todense()
+def test_reductions_bool(random_sparse, reduction, kwargs, axis, keepdims):
+    y = np.zeros((2, 3, 4), dtype=bool)
+    y[0] = True
+    y[1, 1, 1] = True
+    x = sparse.COO.from_numpy(y)
     xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs)
     yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs)
-    assert_eq(xx, yy, **eqkwargs)
+    assert_eq(xx, yy)
 
 
-@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [
-    (np.max, {}, {}),
-    (np.sum, {}, {}),
-    (np.sum, {'dtype': np.float16}, {'atol': 1e-2}),
-    (np.prod, {}, {}),
-    (np.min, {}, {}),
+@pytest.mark.parametrize('reduction,kwargs', [
+    (np.max, {}),
+    (np.sum, {}),
+    (np.sum, {'dtype': np.float32}),
+    (np.mean, {}),
+    (np.mean, {'dtype': np.float32}),
+    (np.prod, {}),
+    (np.min, {}),
 ])
 @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -1, (0, -1)])
 @pytest.mark.parametrize('keepdims', [True, False])
-def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs):
-    x = sparse.random((2, 3, 4), density=.5)
+def test_ufunc_reductions(random_sparse, reduction, kwargs, axis, keepdims):
+    x = random_sparse
     y = x.todense()
     xx = reduction(x, axis=axis, keepdims=keepdims, **kwargs)
     yy = reduction(y, axis=axis, keepdims=keepdims, **kwargs)
-    assert_eq(xx, yy, **eqkwargs)
+    assert_eq(xx, yy)
+    # If not a scalar/1 element array, must be a sparse array
+    if xx.size > 1:
+        assert isinstance(xx, COO)
 
 
 @pytest.mark.parametrize('reduction,kwargs', [
@@ -73,10 +109,14 @@ def test_ufunc_reductions_kwargs(reduction, kwargs):
     xx = reduction(x, **kwargs)
     yy = reduction(y, **kwargs)
     assert_eq(xx, yy)
+    # If not a scalar/1 element array, must be a sparse array
+    if xx.size > 1:
+        assert isinstance(xx, COO)
 
 
 @pytest.mark.parametrize('reduction', [
     'nansum',
+    'nanmean',
     'nanprod',
     'nanmax',
     'nanmin',
@@ -85,6 +125,7 @@ def test_ufunc_reductions_kwargs(reduction, kwargs):
 @pytest.mark.parametrize('keepdims', [False])
 @pytest.mark.parametrize('fraction', [0.25, 0.5, 0.75, 1.0])
 @pytest.mark.filterwarnings('ignore:All-NaN')
+@pytest.mark.filterwarnings('ignore:Mean of empty slice')
 def test_nan_reductions(reduction, axis, keepdims, fraction):
     s = sparse.random((2, 3, 4), data_rvs=random_value_array(np.nan, fraction),
                       density=.25)
@@ -97,6 +138,7 @@ def test_nan_reductions(reduction, axis, keepdims, fraction):
 @pytest.mark.parametrize('reduction', [
     'nanmax',
     'nanmin',
+    'nanmean',
 ])
 @pytest.mark.parametrize('axis', [None, 0, 1])
 def test_all_nan_reduction_warning(reduction, axis):
@@ -1263,10 +1305,14 @@ def test_op_scipy_sparse(func):
 @pytest.mark.parametrize('func', [
     operator.add,
     operator.sub,
-    pytest.mark.xfail(operator.mul, reason='Scipy sparse auto-densifies in this case.'),
-    pytest.mark.xfail(operator.gt, reason='Scipy sparse doesn\'t support this yet.'),
-    pytest.mark.xfail(operator.lt, reason='Scipy sparse doesn\'t support this yet.'),
-    pytest.mark.xfail(operator.ne, reason='Scipy sparse doesn\'t support this yet.'),
+    pytest.param(operator.mul,
+                 marks=pytest.mark.xfail(reason="Scipy sparse auto-densifies in this case.")),
+    pytest.param(operator.gt,
+                 marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")),
+    pytest.param(operator.lt,
+                 marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")),
+    pytest.param(operator.ne,
+                 marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")),
 ])
 def test_op_scipy_sparse_left(func):
     ys = sparse.random((3, 4), density=0.5)