In [None]:
%reset -f
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
from pycuda.sparse.packeted import PacketedSpMV
from pycuda.tools import DeviceMemoryPool
from scipy.sparse import csr_matrix

# Make sure we can detect GPUArray instances

In [None]:
x = np.arange(5)
d_x = gpuarray.to_gpu(x)
print(type(x))
print(x.__class__)

# Make sure spmv is storing csr matrix using gpu arrays.

## Initialize arrays

In [None]:
dtype = np.float32

a_dense = np.array([
    [0, 0, 0, 1, 0, 0, 0],
    [0, 2, 0, 0, 0, 0, 0],
    [0, 0, 4, 0, 0, 0, 3],
    [0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 5, 0, 0, 0],
    [1, 0, 0, 2, 4, 0, 0],
    [0, 3, 0, 0, 0, 0, 0],
], dtype=dtype)
a_sparse = csr_matrix(a_dense)

a_data = a_sparse.data
a_row_ptrs = a_sparse.indptr
a_col_idxs = a_sparse.indices

b = np.array([[1, 2, 3, 4, 5, 6, 7]], dtype=dtype).T
c_correct = np.array([[4, 4, 33, 0, 20, 29, 6]], dtype=dtype).T

m = a_sparse.shape[0]
n = 1
k = a_sparse.shape[1]

## Create PacketedSpMV instance

In [None]:
spmv = PacketedSpMV(a_sparse, is_symmetric=False, dtype=dtype)

In [None]:
for attr in dir(spmv):
    val = getattr(spmv, attr)
    print('{} ({}) = {}'.format(attr, type(val).__name__, val))

In [None]:
for attr in dir(spmv):
    val = getattr(spmv, attr)
    if type(val).__name__.endswith('GPUArray'):
        print('{} ({}) = {}'.format(attr, type(val).__name__, val))

# Make sure we can correctly calculate the product

## Move b to initialize c on GPU

In [None]:
dev_pool = DeviceMemoryPool()

d_b = gpuarray.to_gpu(b, dev_pool.allocate)
d_c = gpuarray.zeros(m, dtype=dtype, allocator=d_b.allocator)

## Calculate the matrix product

In [None]:
d_c = spmv(d_b, d_c)

print('C computed: {}'.format(d_c.get().flatten()))
print('C correct: {}'.format(c_correct.flatten()))

# Run benchmarks