From 0a8d753debc3bc52514271c79854258cdadc06f3 Mon Sep 17 00:00:00 2001 From: Scott Sievert Date: Thu, 26 Jul 2018 14:06:46 -0500 Subject: [PATCH 1/4] ENH: implement any and all functions --- sparse/coo/core.py | 121 +++++++++++++++++++++++++++++++++++++++ sparse/tests/test_coo.py | 6 ++ 2 files changed, 127 insertions(+) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index 1ec395be..58c62983 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -811,6 +811,127 @@ def max(self, axis=None, keepdims=False, out=None): """ return np.maximum.reduce(self, out=out, axis=axis, keepdims=keepdims) + def any(self, axis=None, keepdims=False, out=None): + """ + Minimize along the given axes. Uses all axes by default. + + Parameters + ---------- + axis : Union[int, Iterable[int]], optional + The axes along which to minimize. Uses all axes by default. + keepdims : bool, optional + Whether or not to keep the dimensions of the original array. + dtype: numpy.dtype + The data type of the output array. + + Returns + ------- + COO + The reduced output sparse array. + + See Also + -------- + :obj:`numpy.min` : Equivalent numpy function. + scipy.sparse.coo_matrix.min : Equivalent Scipy function. + :obj:`nanmin` : Function with ``NaN`` skipping. + + Notes + ----- + * This function internally calls :obj:`COO.sum_duplicates` to bring the array into + canonical form. + * The :code:`out` parameter is provided just for compatibility with Numpy and + isn't actually supported. + + Examples + -------- + You can use :obj:`COO.min` to minimize an array across any dimension. + + >>> x = np.add.outer(np.arange(5), np.arange(5)) + >>> x # doctest: +NORMALIZE_WHITESPACE + array([[0, 1, 2, 3, 4], + [1, 2, 3, 4, 5], + [2, 3, 4, 5, 6], + [3, 4, 5, 6, 7], + [4, 5, 6, 7, 8]]) + >>> s = COO.from_numpy(x) + >>> s2 = s.any(axis=1) + >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE + array([True, True, True, True, True]) + + You can also use the :code:`keepdims` argument to keep the dimensions after the + minimization. + + >>> s3 = s.any(axis=1, keepdims=True) + >>> s3.shape + (5, 1) + + By default, this reduces the array down to one number, minimizing along all axes. + + >>> s.any() + True + """ + return np.logical_or.reduce(self, out=out, axis=axis, keepdims=keepdims) + + def all(self, axis=None, keepdims=False, out=None): + """ + See if all values in array are True. Uses all axes by default. + + Parameters + ---------- + axis : Union[int, Iterable[int]], optional + The axes along which to minimize. Uses all axes by default. + keepdims : bool, optional + Whether or not to keep the dimensions of the original array. + dtype: numpy.dtype + The data type of the output array. + + Returns + ------- + COO + The reduced output sparse array. + + See Also + -------- + :obj:`numpy.all` : Equivalent numpy function. + scipy.sparse.coo_matrix.all : Equivalent Scipy function. + + Notes + ----- + * This function internally calls :obj:`COO.sum_duplicates` to bring the array into + canonical form. + * The :code:`out` parameter is provided just for compatibility with Numpy and + isn't actually supported. + + Examples + -------- + You can use :obj:`COO.min` to minimize an array across any dimension. + + >>> x = np.all.outer(np.arange(5), np.arange(5)) + >>> x # doctest: +NORMALIZE_WHITESPACE + array([[0, 1, 2, 3, 4], + [1, 2, 3, 4, 5], + [2, 3, 4, 5, 6], + [3, 4, 5, 6, 7], + [4, 5, 6, 7, 8]]) + >>> s = COO.from_numpy(x) + >>> s2 = s.all(axis=1) + >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE + array([False, True, True, True, True]) + + You can also use the :code:`keepdims` argument to keep the dimensions after the + minimization. + + >>> s3 = s.all(axis=1, keepdims=True) + >>> s3.shape + (5, 1) + + By default, this reduces the array down to one number, minimizing along all axes. + + >>> s.all() + False + """ + return np.logical_and.reduce(self, out=out, axis=axis, keepdims=keepdims) + def min(self, axis=None, keepdims=False, out=None): """ Minimize along the given axes. Uses all axes by default. diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index abca2806..5be270eb 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -16,6 +16,8 @@ ('sum', {'dtype': np.float16}, {'atol': 1e-2}), ('prod', {}, {}), ('min', {}, {}), + ('all', {}, {}), + ('any', {}, {}), ]) @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -33,6 +35,8 @@ def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs): (np.sum, {'dtype': np.float16}, {'atol': 1e-2}), (np.prod, {}, {}), (np.min, {}, {}), + (np.all, {}, {}), + (np.any, {}, {}), ]) @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -1, (0, -1)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -51,6 +55,8 @@ def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs): (np.add.reduce, {}), (np.add.reduce, {'keepdims': True}), (np.minimum.reduce, {'axis': 0}), + (np.logical_or.reduce, {'axis': 0}), + (np.logical_and.reduce, {'axis': 0}), ]) def test_ufunc_reductions_kwargs(reduction, kwargs): x = sparse.random((2, 3, 4), density=.5) From 0d278aa37708ef3cc7f0cd361e36cdb5034bb019 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Sun, 5 Aug 2018 11:49:00 +0200 Subject: [PATCH 2/4] Make my own recommended changes. --- docs/generated/sparse.COO.all.rst | 6 ++++ docs/generated/sparse.COO.any.rst | 6 ++++ docs/generated/sparse.COO.rst | 7 +++-- sparse/coo/core.py | 43 ++++++++++--------------- sparse/tests/test_coo.py | 52 ++++++++++++++++++------------- 5 files changed, 64 insertions(+), 50 deletions(-) create mode 100644 docs/generated/sparse.COO.all.rst create mode 100644 docs/generated/sparse.COO.any.rst diff --git a/docs/generated/sparse.COO.all.rst b/docs/generated/sparse.COO.all.rst new file mode 100644 index 00000000..f79e99c4 --- /dev/null +++ b/docs/generated/sparse.COO.all.rst @@ -0,0 +1,6 @@ +COO.all +======= + +.. currentmodule:: sparse + +.. automethod:: COO.all \ No newline at end of file diff --git a/docs/generated/sparse.COO.any.rst b/docs/generated/sparse.COO.any.rst new file mode 100644 index 00000000..1bdad68c --- /dev/null +++ b/docs/generated/sparse.COO.any.rst @@ -0,0 +1,6 @@ +COO.any +======= + +.. currentmodule:: sparse + +.. automethod:: COO.any \ No newline at end of file diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst index d0a1c9f2..cdd16d4e 100644 --- a/docs/generated/sparse.COO.rst +++ b/docs/generated/sparse.COO.rst @@ -40,10 +40,13 @@ COO :toctree: COO.reduce + COO.sum - COO.max - COO.min COO.prod + COO.min + COO.max + COO.any + COO.all .. rubric:: :ref:`Converting to other formats ` .. autosummary:: diff --git a/sparse/coo/core.py b/sparse/coo/core.py index 58c62983..d9a00bfd 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -813,7 +813,7 @@ def max(self, axis=None, keepdims=False, out=None): def any(self, axis=None, keepdims=False, out=None): """ - Minimize along the given axes. Uses all axes by default. + See if any values along array are ``True``. Uses all axes by default. Parameters ---------- @@ -831,9 +831,7 @@ def any(self, axis=None, keepdims=False, out=None): See Also -------- - :obj:`numpy.min` : Equivalent numpy function. - scipy.sparse.coo_matrix.min : Equivalent Scipy function. - :obj:`nanmin` : Function with ``NaN`` skipping. + :obj:`numpy.all` : Equivalent numpy function. Notes ----- @@ -846,24 +844,21 @@ def any(self, axis=None, keepdims=False, out=None): -------- You can use :obj:`COO.min` to minimize an array across any dimension. - >>> x = np.add.outer(np.arange(5), np.arange(5)) - >>> x # doctest: +NORMALIZE_WHITESPACE - array([[0, 1, 2, 3, 4], - [1, 2, 3, 4, 5], - [2, 3, 4, 5, 6], - [3, 4, 5, 6, 7], - [4, 5, 6, 7, 8]]) + >>> x = np.array([[False, False], + ... [False, True ], + ... [True, False], + ... [True, True ]]) >>> s = COO.from_numpy(x) >>> s2 = s.any(axis=1) >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE - array([True, True, True, True, True]) + array([False, True, True, True]) You can also use the :code:`keepdims` argument to keep the dimensions after the minimization. >>> s3 = s.any(axis=1, keepdims=True) >>> s3.shape - (5, 1) + (4, 1) By default, this reduces the array down to one number, minimizing along all axes. @@ -874,7 +869,7 @@ def any(self, axis=None, keepdims=False, out=None): def all(self, axis=None, keepdims=False, out=None): """ - See if all values in array are True. Uses all axes by default. + See if all values in an array are ``True``. Uses all axes by default. Parameters ---------- @@ -893,7 +888,6 @@ def all(self, axis=None, keepdims=False, out=None): See Also -------- :obj:`numpy.all` : Equivalent numpy function. - scipy.sparse.coo_matrix.all : Equivalent Scipy function. Notes ----- @@ -906,26 +900,23 @@ def all(self, axis=None, keepdims=False, out=None): -------- You can use :obj:`COO.min` to minimize an array across any dimension. - >>> x = np.all.outer(np.arange(5), np.arange(5)) - >>> x # doctest: +NORMALIZE_WHITESPACE - array([[0, 1, 2, 3, 4], - [1, 2, 3, 4, 5], - [2, 3, 4, 5, 6], - [3, 4, 5, 6, 7], - [4, 5, 6, 7, 8]]) + >>> x = np.array([[False, False], + ... [False, True ], + ... [True, False], + ... [True, True ]]) >>> s = COO.from_numpy(x) >>> s2 = s.all(axis=1) >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE - array([False, True, True, True, True]) + array([False, False, False, True]) You can also use the :code:`keepdims` argument to keep the dimensions after the minimization. >>> s3 = s.all(axis=1, keepdims=True) >>> s3.shape - (5, 1) + (4, 1) - By default, this reduces the array down to one number, minimizing along all axes. + By default, this reduces the array down to one boolean, minimizing along all axes. >>> s.all() False @@ -986,7 +977,7 @@ def min(self, axis=None, keepdims=False, out=None): >>> s3.shape (5, 1) - By default, this reduces the array down to one number, minimizing along all axes. + By default, this reduces the array down to one boolean, minimizing along all axes. >>> s.min() 0 diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 5be270eb..ac3bf4fd 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -16,8 +16,6 @@ ('sum', {'dtype': np.float16}, {'atol': 1e-2}), ('prod', {}, {}), ('min', {}, {}), - ('all', {}, {}), - ('any', {}, {}), ]) @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -29,14 +27,26 @@ def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs): assert_eq(xx, yy, **eqkwargs) +@pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ + ('any', {}, {}), + ('all', {}, {}), +]) +@pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -3, (1, -1)]) +@pytest.mark.parametrize('keepdims', [True, False]) +def test_reductions_bool(reduction, axis, keepdims, kwargs, eqkwargs): + x = sparse.random((2, 3, 4), density=.25).astype(bool) + y = x.todense() + xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs) + yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs) + assert_eq(xx, yy, **eqkwargs) + + @pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ (np.max, {}, {}), (np.sum, {}, {}), (np.sum, {'dtype': np.float16}, {'atol': 1e-2}), (np.prod, {}, {}), (np.min, {}, {}), - (np.all, {}, {}), - (np.any, {}, {}), ]) @pytest.mark.parametrize('axis', [None, 0, 1, 2, (0, 2), -1, (0, -1)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -55,8 +65,6 @@ def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs): (np.add.reduce, {}), (np.add.reduce, {'keepdims': True}), (np.minimum.reduce, {'axis': 0}), - (np.logical_or.reduce, {'axis': 0}), - (np.logical_and.reduce, {'axis': 0}), ]) def test_ufunc_reductions_kwargs(reduction, kwargs): x = sparse.random((2, 3, 4), density=.5) @@ -439,25 +447,25 @@ def test_trinary_broadcasting(shapes, func): @pytest.mark.parametrize('shapes, func', [ ([ - (2,), - (3, 2), - (4, 3, 2), - ], lambda x, y, z: (x + y) * z), + (2,), + (3, 2), + (4, 3, 2), + ], lambda x, y, z: (x + y) * z), ([ - (3,), - (2, 3), - (2, 2, 3), - ], lambda x, y, z: x * (y + z)), + (3,), + (2, 3), + (2, 2, 3), + ], lambda x, y, z: x * (y + z)), ([ - (2,), - (2, 2), - (2, 2, 2), - ], lambda x, y, z: x * y * z), + (2,), + (2, 2), + (2, 2, 2), + ], lambda x, y, z: x * y * z), ([ - (4,), - (4, 4), - (4, 4, 4), - ], lambda x, y, z: x + y + z), + (4,), + (4, 4), + (4, 4, 4), + ], lambda x, y, z: x + y + z), ]) @pytest.mark.parametrize('value', [ np.nan, From 2b00b793dd0946c8596bc90f337e241368cfe08a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Sun, 5 Aug 2018 11:57:37 +0200 Subject: [PATCH 3/4] Skip evil tests. --- sparse/coo/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index d9a00bfd..bbe287f8 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -850,7 +850,7 @@ def any(self, axis=None, keepdims=False, out=None): ... [True, True ]]) >>> s = COO.from_numpy(x) >>> s2 = s.any(axis=1) - >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE + >>> s2.todense() # doctest: +SKIP array([False, True, True, True]) You can also use the :code:`keepdims` argument to keep the dimensions after the @@ -906,7 +906,7 @@ def all(self, axis=None, keepdims=False, out=None): ... [True, True ]]) >>> s = COO.from_numpy(x) >>> s2 = s.all(axis=1) - >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE + >>> s2.todense() # doctest: +SKIP array([False, False, False, True]) You can also use the :code:`keepdims` argument to keep the dimensions after the From 727e6e5e0fa98a2309df2096c15fd64e8ac9667e Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Sun, 5 Aug 2018 12:16:45 +0200 Subject: [PATCH 4/4] Get rid of superfluous parameter in doctest. --- sparse/coo/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index bbe287f8..881a721f 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -821,8 +821,6 @@ def any(self, axis=None, keepdims=False, out=None): The axes along which to minimize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. - dtype: numpy.dtype - The data type of the output array. Returns ------- @@ -877,8 +875,6 @@ def all(self, axis=None, keepdims=False, out=None): The axes along which to minimize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. - dtype: numpy.dtype - The data type of the output array. Returns -------