In [1]:
from dask.array import from_array
from numpy import array

A = array(range(36)).reshape((6,6))
d = from_array(A, chunks=(3,3))
d

Unnamed: 0,Array,Chunk
Bytes,288 B,72 B
Shape,"(6, 6)","(3, 3)"
Count,5 Tasks,4 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 288 B 72 B Shape (6, 6) (3, 3) Count 5 Tasks 4 Chunks Type int64 numpy.ndarray",6  6,

Unnamed: 0,Array,Chunk
Bytes,288 B,72 B
Shape,"(6, 6)","(3, 3)"
Count,5 Tasks,4 Chunks
Type,int64,numpy.ndarray


## `scipy.sparse`

In [2]:
from scipy.sparse import spmatrix, coo_matrix, csr_matrix, csc_matrix, dia_matrix

sps = d.map_blocks(coo_matrix, chunks=(3,3))
spsc = sps.compute(scheduler="synchronous")
spsc

<6x6 sparse matrix of type '<class 'numpy.longlong'>'
	with 35 stored elements in COOrdinate format>

In [3]:
spsc.todense()

matrix([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34, 35]], dtype=int64)

### `axis=None`

Dask dense blocks:

In [4]:
d.sum().compute(), d.sum(keepdims=False).compute(), d.sum(keepdims=True).compute()

(630, 630, array([[630]]))

Dask scipy.sparse blocks:

In [5]:
sps.sum().compute(), sps.sum(keepdims=False).compute(), sps.sum(keepdims=True).compute()

(matrix([[630]]), matrix([[630]]), matrix([[630]]))

scipy.sparse, sans Dask:

In [6]:
spsc.sum(), spsc.sum(keepdims=False), spsc.sum(keepdims=True)

(630, 630, matrix([[630]], dtype=int64))

### `axis=0`

Dask dense blocks:

In [7]:
d.sum(axis=0).compute(), d.sum(axis=0, keepdims=False).compute(), d.sum(axis=0, keepdims=True).compute()

(array([ 90,  96, 102, 108, 114, 120]),
 array([ 90,  96, 102, 108, 114, 120]),
 array([[ 90,  96, 102, 108, 114, 120]]))

Dask scipy.sparse blocks:

In [8]:
sps.sum(axis=0).compute(), sps.sum(axis=0, keepdims=False).compute(), sps.sum(axis=0, keepdims=True).compute()

Forcing keepdims=True for np.matrix.sum(axis=0)
Forcing keepdims=True for np.matrix.sum(axis=0)
Forcing keepdims=True for np.matrix.sum(axis=0)


(array([[ 90,  96, 102, 108, 114, 120]]),
 array([[ 90,  96, 102, 108, 114, 120]]),
 array([[ 90,  96, 102, 108, 114, 120]]))

scipy.sparse, sans Dask:

In [9]:
spsc.sum(axis=0), spsc.sum(axis=0, keepdims=False), spsc.sum(axis=0, keepdims=True)

(matrix([[ 90,  96, 102, 108, 114, 120]], dtype=int64),
 matrix([[ 90,  96, 102, 108, 114, 120]], dtype=int64),
 matrix([[ 90,  96, 102, 108, 114, 120]], dtype=int64))

### `axis=1`

Dask dense blocks:

In [10]:
d.sum(axis=1).compute(), d.sum(axis=1, keepdims=False).compute(), d.sum(axis=1, keepdims=True).compute()

(array([ 15,  51,  87, 123, 159, 195]),
 array([ 15,  51,  87, 123, 159, 195]),
 array([[ 15],
        [ 51],
        [ 87],
        [123],
        [159],
        [195]]))

Dask scipy.sparse blocks:

In [11]:
sps.sum(axis=1).compute(), sps.sum(axis=1, keepdims=False).compute(), sps.sum(axis=1, keepdims=True).compute()

Forcing keepdims=True for np.matrix.sum(axis=1)
Forcing keepdims=True for np.matrix.sum(axis=1)
Forcing keepdims=True for np.matrix.sum(axis=1)


(array([[ 15],
        [ 51],
        [ 87],
        [123],
        [159],
        [195]]),
 array([[ 15],
        [ 51],
        [ 87],
        [123],
        [159],
        [195]]),
 array([[ 15],
        [ 51],
        [ 87],
        [123],
        [159],
        [195]]))

scipy.sparse, sans Dask:

In [12]:
spsc.sum(axis=1), spsc.sum(axis=1, keepdims=False), spsc.sum(axis=1, keepdims=True)

(matrix([[ 15],
         [ 51],
         [ 87],
         [123],
         [159],
         [195]], dtype=int64),
 matrix([[ 15],
         [ 51],
         [ 87],
         [123],
         [159],
         [195]], dtype=int64),
 matrix([[ 15],
         [ 51],
         [ 87],
         [123],
         [159],
         [195]], dtype=int64))