New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add COO.mean and sparse.nanmean #190
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
COO\.mean | ||
========= | ||
|
||
.. currentmodule:: sparse | ||
|
||
.. automethod:: COO.mean |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,7 @@ COO | |
COO.reduce | ||
|
||
COO.sum | ||
COO.mean | ||
COO.prod | ||
COO.min | ||
COO.max | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
nanmean | ||
======= | ||
|
||
.. currentmodule:: sparse | ||
|
||
.. autofunction:: nanmean |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,8 @@ API | |
|
||
nanmax | ||
|
||
nanmean | ||
|
||
nanmin | ||
|
||
nanprod | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
from .core import COO, as_coo | ||
from .umath import elemwise | ||
from .common import (tensordot, dot, concatenate, stack, triu, tril, where, | ||
nansum, nanprod, nanmin, nanmax, nanreduce, roll, eye, | ||
full, full_like, zeros, zeros_like, ones, ones_like) | ||
nansum, nanmean, nanprod, nanmin, nanmax, nanreduce, roll, | ||
eye, full, full_like, zeros, zeros_like, ones, ones_like) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -426,6 +426,62 @@ def nansum(x, axis=None, keepdims=False, dtype=None, out=None): | |
return nanreduce(x, np.add, axis=axis, keepdims=keepdims, dtype=dtype) | ||
|
||
|
||
def nanmean(x, axis=None, keepdims=False, dtype=None, out=None): | ||
""" | ||
Performs a ``NaN`` skipping mean operation along the given axes. Uses all axes by default. | ||
|
||
Parameters | ||
---------- | ||
x : SparseArray | ||
The array to perform the reduction on. | ||
axis : Union[int, Iterable[int]], optional | ||
The axes along which to compute the mean. Uses all axes by default. | ||
keepdims : bool, optional | ||
Whether or not to keep the dimensions of the original array. | ||
dtype: numpy.dtype | ||
The data type of the output array. | ||
|
||
Returns | ||
------- | ||
COO | ||
The reduced output sparse array. | ||
|
||
See Also | ||
-------- | ||
:obj:`COO.mean` : Function without ``NaN`` skipping. | ||
numpy.nanmean : Equivalent Numpy function. | ||
""" | ||
assert out is None | ||
x = asCOO(x, name='nanmean') | ||
|
||
if not np.issubdtype(x.dtype, np.floating): | ||
return x.mean(axis=axis, keepdims=keepdims, dtype=dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need a test for this branch, apparently. |
||
|
||
mask = np.isnan(x) | ||
x2 = where(mask, 0, x) | ||
|
||
# Count the number non-nan elements along axis | ||
nancount = mask.sum(axis=axis, dtype='i8', keepdims=keepdims) | ||
if axis is None: | ||
axis = tuple(range(x.ndim)) | ||
elif not isinstance(axis, tuple): | ||
axis = (axis,) | ||
den = 1 | ||
for ax in axis: | ||
den *= x.shape[ax] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can probably be written more compactly with |
||
den -= nancount | ||
|
||
if np.any(den == 0): | ||
warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) | ||
|
||
num = np.sum(x2, axis=axis, dtype=dtype, keepdims=keepdims) | ||
|
||
with np.errstate(invalid='ignore', divide='ignore'): | ||
if num.ndim: | ||
return np.true_divide(num, den, casting='unsafe') | ||
return (num / den).astype(dtype) | ||
|
||
|
||
def nanmax(x, axis=None, keepdims=False, dtype=None, out=None): | ||
""" | ||
Maximize along the given axes, skipping ``NaN`` values. Uses all axes by default. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1076,6 +1076,93 @@ def prod(self, axis=None, keepdims=False, dtype=None, out=None): | |
""" | ||
return np.multiply.reduce(self, out=out, axis=axis, keepdims=keepdims, dtype=dtype) | ||
|
||
def mean(self, axis=None, keepdims=False, dtype=None, out=None): | ||
""" | ||
Compute the mean along the given axes. Uses all axes by default. | ||
|
||
Parameters | ||
---------- | ||
axis : Union[int, Iterable[int]], optional | ||
The axes along which to compute the mean. Uses all axes by default. | ||
keepdims : bool, optional | ||
Whether or not to keep the dimensions of the original array. | ||
dtype: numpy.dtype | ||
The data type of the output array. | ||
|
||
Returns | ||
------- | ||
COO | ||
The reduced output sparse array. | ||
|
||
See Also | ||
-------- | ||
numpy.ndarray.mean : Equivalent numpy method. | ||
scipy.sparse.coo_matrix.mean : Equivalent Scipy method. | ||
|
||
Notes | ||
----- | ||
* This function internally calls :obj:`COO.sum_duplicates` to bring the | ||
array into canonical form. | ||
* The :code:`out` parameter is provided just for compatibility with | ||
Numpy and isn't actually supported. | ||
|
||
Examples | ||
-------- | ||
You can use :obj:`COO.mean` to compute the mean of an array across any | ||
dimension. | ||
|
||
>>> x = np.array([[1, 2, 0, 0], | ||
... [0, 1, 0, 0]], dtype='i8') | ||
>>> s = COO.from_numpy(x) | ||
>>> s2 = s.mean(axis=1) | ||
>>> s2.todense() # doctest: +SKIP | ||
array([0.5, 1.5, 0., 0.]) | ||
|
||
You can also use the :code:`keepdims` argument to keep the dimensions | ||
after the mean. | ||
|
||
>>> s3 = s.mean(axis=0, keepdims=True) | ||
>>> s3.shape | ||
(1, 4) | ||
|
||
You can pass in an output datatype, if needed. | ||
|
||
>>> s4 = s.mean(axis=0, dtype=np.float16) | ||
>>> s4.dtype | ||
dtype('float16') | ||
|
||
By default, this reduces the array down to one number, computing the | ||
mean along all axes. | ||
|
||
>>> s.mean() | ||
0.5 | ||
""" | ||
if axis is None: | ||
axis = tuple(range(self.ndim)) | ||
elif not isinstance(axis, tuple): | ||
axis = (axis,) | ||
den = 1 | ||
for ax in axis: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can probably be written more compactly with |
||
den *= self.shape[ax] | ||
|
||
if dtype is None: | ||
if issubclass(self.dtype.type, (np.integer, np.bool_)): | ||
out_dtype = inter_dtype = np.dtype('f8') | ||
else: | ||
out_dtype = self.dtype | ||
inter_dtype = (np.dtype('f4') | ||
if issubclass(out_dtype.type, np.float16) | ||
else out_dtype) | ||
else: | ||
out_dtype = inter_dtype = dtype | ||
|
||
num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype) | ||
|
||
if num.ndim: | ||
out = np.true_divide(num, den, casting='unsafe') | ||
return out.astype(out_dtype) if out.dtype != out_dtype else out | ||
return (num / den).astype(out_dtype) | ||
|
||
def transpose(self, axes=None): | ||
""" | ||
Returns a new array which has the order of the axes switched. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,52 +11,88 @@ | |
from sparse.utils import assert_eq, random_value_array | ||
|
||
|
||
@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ | ||
('max', {}, {}), | ||
('sum', {}, {}), | ||
('sum', {'dtype': np.float16}, {'atol': 1e-2}), | ||
('prod', {}, {}), | ||
('min', {}, {}), | ||
@pytest.fixture(scope='module', params=['f8', 'f4', 'i8', 'i4']) | ||
def random_sparse(request): | ||
dtype = request.param | ||
if np.issubdtype(dtype, np.integer): | ||
def data_rvs(n): | ||
return np.random.randint(-10000, 10000, n) | ||
else: | ||
data_rvs = None | ||
return sparse.random((20, 30, 40), density=.25, data_rvs=data_rvs).astype(dtype) | ||
|
||
|
||
@pytest.mark.parametrize('reduction, kwargs', [ | ||
('sum', {}), | ||
('sum', {'dtype': np.float32}), | ||
('mean', {}), | ||
('mean', {'dtype': np.float32}), | ||
('prod', {}), | ||
('max', {}), | ||
('min', {}), | ||
]) | ||
@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)]) | ||
@pytest.mark.parametrize('keepdims', [True, False]) | ||
def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs): | ||
x = sparse.random((2, 3, 4), density=.25) | ||
def test_reductions(reduction, random_sparse, axis, keepdims, kwargs): | ||
x = random_sparse | ||
y = x.todense() | ||
xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs) | ||
yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs) | ||
assert_eq(xx, yy, **eqkwargs) | ||
assert_eq(xx, yy) | ||
|
||
|
||
@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ | ||
('any', {}, {}), | ||
('all', {}, {}), | ||
@pytest.mark.xfail(reason=('Setting output dtype=float16 produces results ' | ||
'inconsistent with numpy')) | ||
@pytest.mark.filterwarnings('ignore:overflow') | ||
@pytest.mark.parametrize('reduction, kwargs', [ | ||
('sum', {'dtype': np.float16}), | ||
('mean', {'dtype': np.float16}), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optional: The reason we had It might be nice to rewrite this at some point, but that's how it is now. There is a plan to re-write this, basically something that takes two callables and some shapes and compares the NumPy and sparse functions (see #163). Anyway, back to the point: If the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This may be true, but the errors have to be pretty large. With There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In that case, feel free to leave this as-is for now (and if you want, raise an issue documenting this). |
||
]) | ||
@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2)]) | ||
def test_reductions_float16(random_sparse, reduction, kwargs, axis): | ||
x = random_sparse | ||
y = x.todense() | ||
xx = getattr(x, reduction)(axis=axis, **kwargs) | ||
yy = getattr(y, reduction)(axis=axis, **kwargs) | ||
assert_eq(xx, yy, atol=1e-2) | ||
|
||
|
||
@pytest.mark.parametrize('reduction,kwargs', [ | ||
('any', {}), | ||
('all', {}), | ||
]) | ||
@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)]) | ||
@pytest.mark.parametrize('keepdims', [True, False]) | ||
def test_reductions_bool(reduction, axis, keepdims, kwargs, eqkwargs): | ||
x = sparse.random((2, 3, 4), density=.25).astype(bool) | ||
y = x.todense() | ||
def test_reductions_bool(random_sparse, reduction, kwargs, axis, keepdims): | ||
y = np.zeros((2, 3, 4), dtype=bool) | ||
y[0] = True | ||
y[1, 1, 1] = True | ||
x = sparse.COO.from_numpy(y) | ||
xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs) | ||
yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs) | ||
assert_eq(xx, yy, **eqkwargs) | ||
assert_eq(xx, yy) | ||
|
||
|
||
@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ | ||
(np.max, {}, {}), | ||
(np.sum, {}, {}), | ||
(np.sum, {'dtype': np.float16}, {'atol': 1e-2}), | ||
(np.prod, {}, {}), | ||
(np.min, {}, {}), | ||
@pytest.mark.parametrize('reduction,kwargs', [ | ||
(np.max, {}), | ||
(np.sum, {}), | ||
(np.sum, {'dtype': np.float32}), | ||
(np.mean, {}), | ||
(np.mean, {'dtype': np.float32}), | ||
(np.prod, {}), | ||
(np.min, {}), | ||
]) | ||
@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -1, (0, -1)]) | ||
@pytest.mark.parametrize('keepdims', [True, False]) | ||
def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs): | ||
x = sparse.random((2, 3, 4), density=.5) | ||
def test_ufunc_reductions(random_sparse, reduction, kwargs, axis, keepdims): | ||
x = random_sparse | ||
y = x.todense() | ||
xx = reduction(x, axis=axis, keepdims=keepdims, **kwargs) | ||
yy = reduction(y, axis=axis, keepdims=keepdims, **kwargs) | ||
assert_eq(xx, yy, **eqkwargs) | ||
assert_eq(xx, yy) | ||
# If not a scalar/1 element array, must be a sparse array | ||
if xx.size > 1: | ||
assert isinstance(xx, COO) | ||
|
||
|
||
@pytest.mark.parametrize('reduction,kwargs', [ | ||
|
@@ -73,10 +109,14 @@ def test_ufunc_reductions_kwargs(reduction, kwargs): | |
xx = reduction(x, **kwargs) | ||
yy = reduction(y, **kwargs) | ||
assert_eq(xx, yy) | ||
# If not a scalar/1 element array, must be a sparse array | ||
if xx.size > 1: | ||
assert isinstance(xx, COO) | ||
|
||
|
||
@pytest.mark.parametrize('reduction', [ | ||
'nansum', | ||
'nanmean', | ||
'nanprod', | ||
'nanmax', | ||
'nanmin', | ||
|
@@ -85,6 +125,7 @@ def test_ufunc_reductions_kwargs(reduction, kwargs): | |
@pytest.mark.parametrize('keepdims', [False]) | ||
@pytest.mark.parametrize('fraction', [0.25, 0.5, 0.75, 1.0]) | ||
@pytest.mark.filterwarnings('ignore:All-NaN') | ||
@pytest.mark.filterwarnings('ignore:Mean of empty slice') | ||
def test_nan_reductions(reduction, axis, keepdims, fraction): | ||
s = sparse.random((2, 3, 4), data_rvs=random_value_array(np.nan, fraction), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems you forgot to change to the fixture here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, my bad. The fixture doesn't do NaNs. Ignore. It would be nice to and a non-floating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant that the fixture you created wasn't used in the NaN-reductions, but like I said, you can safely ignore me. |
||
density=.25) | ||
|
@@ -97,6 +138,7 @@ def test_nan_reductions(reduction, axis, keepdims, fraction): | |
@pytest.mark.parametrize('reduction', [ | ||
'nanmax', | ||
'nanmin', | ||
'nanmean', | ||
]) | ||
@pytest.mark.parametrize('axis', [None, 0, 1]) | ||
def test_all_nan_reduction_warning(reduction, axis): | ||
|
@@ -1263,10 +1305,14 @@ def test_op_scipy_sparse(func): | |
@pytest.mark.parametrize('func', [ | ||
operator.add, | ||
operator.sub, | ||
pytest.mark.xfail(operator.mul, reason='Scipy sparse auto-densifies in this case.'), | ||
pytest.mark.xfail(operator.gt, reason='Scipy sparse doesn\'t support this yet.'), | ||
pytest.mark.xfail(operator.lt, reason='Scipy sparse doesn\'t support this yet.'), | ||
pytest.mark.xfail(operator.ne, reason='Scipy sparse doesn\'t support this yet.'), | ||
pytest.param(operator.mul, | ||
marks=pytest.mark.xfail(reason="Scipy sparse auto-densifies in this case.")), | ||
pytest.param(operator.gt, | ||
marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")), | ||
pytest.param(operator.lt, | ||
marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")), | ||
pytest.param(operator.ne, | ||
marks=pytest.mark.xfail(reason="Scipy sparse doesn't support this yet.")), | ||
]) | ||
def test_op_scipy_sparse_left(func): | ||
ys = sparse.random((3, 4), density=0.5) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm surprised this works --
asCOO
has noname
parameter. I must have forgotten to check for superfluouskwargs
somewhere.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it does,
asCOO
is different fromas_coo
in core.py. See the top of this file.