# Basic linear algebra operations using GPU through TensorFlow

In [1]:
%reset -f
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
from time import time

## List GPU devices

In [2]:
gpus = [x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU']
print('GPU DEVICES:\n  {}'.format(gpus))

GPU DEVICES:
  ['/device:GPU:0']


# Run benchmarks
Comparing 1 NVIDIA Tesla K80 12GB GPU vs. 8 vCPUs with 30GB RAM.

## Array addition

In [3]:
ITERS = 30
M = 10000
N = 10000

### Using GPU

In [4]:
np.random.seed(0)

with tf.device('/device:GPU:0'):
    
    a = tf.constant(np.random.rand(M, N))
    b = tf.constant(np.random.rand(M, N))
    
    tic = time()
    for ii in range(ITERS):
        c = tf.add(a, b)
    toc = time()
    
elapsed = toc - tic

print('GPU array addition took {} seconds per iteration.'.format(elapsed/ITERS))

GPU array addition took 0.000862272580464681 seconds per iteration.


### Using CPU

In [5]:
np.random.seed(0)

a = np.random.rand(M, N)
b = np.random.rand(M, N)

tic = time()
for ii in range(ITERS):
    c = a + b
toc = time()

elapsed = toc - tic

print('CPU array addition took {} seconds per iteration.'.format(elapsed/ITERS))

CPU array addition took 0.5009345054626465 seconds per iteration.


## Matrix multiplication

In [6]:
ITERS = 30
M = 2000
K = 3000
N = 4000

### Using GPU

In [7]:
with tf.device('/device:GPU:0'):
    
    np.random.seed(0)

    a = tf.constant(np.random.rand(M, K))
    b = tf.constant(np.random.rand(K, N))

    tic = time()
    for ii in range(ITERS):
        c = tf.matmul(a, b)
    toc = time()
    
elapsed = toc - tic
    
print('TF-GPU dense matrix multiplication took {} seconds per iteration.'.format(elapsed/ITERS))

TF-GPU dense matrix multiplication took 0.0014510075251261392 seconds per iteration.


### Using CPU

In [8]:
np.random.seed(0)

a = np.random.rand(M, K)
b = np.random.rand(K, N)

tic = time()
for ii in range(ITERS):
    c = np.dot(a, b)
toc = time()

elapsed = toc - tic

print('Numpy dense matrix multiplication took {} seconds per iteration.'.format(elapsed/ITERS))

Numpy dense matrix multiplication took 0.4294951359430949 seconds per iteration.


## Sparse matrix by dense vector (SPMV) multiplication

In [9]:
from scipy.sparse import csr_matrix

ITERS = 30
N = 20000
P = 0.1

### Using GPU

In [10]:
with tf.device('/device:GPU:0'):
    
    np.random.seed(0)

    a_dense = np.random.rand(N, N)
    a_dense[a_dense > P] = 0
    
    indices = np.transpose(a_dense.nonzero())
    values = a_dense[indices[:, 0], indices[:, 1]]
    dense_shape = a_dense.shape
    
    a_sparse = tf.SparseTensor(indices, values, dense_shape)

    b = tf.constant(np.random.rand(N, 1))
    
    tic = time()
    for ii in range(ITERS):
        c = tf.sparse_tensor_dense_matmul(a_sparse, b)
    toc = time()
    
elapsed = toc - tic

print('GPU spmv product took {} seconds per iteration.'.format(elapsed/ITERS))

GPU spmv product took 0.0011811971664428711 seconds per iteration.


### Using CPU

In [11]:
np.random.seed(0)

a_dense = np.random.rand(N, N)
a_dense[a_dense > P] = 0
a_sparse = csr_matrix(a_dense)

b = np.random.rand(N)

tic = time()
for ii in range(ITERS):
    c = a_sparse.dot(b)
toc = time()

elapsed = toc - tic

print('Scipy spmv product took {} seconds per iteration.'.format(elapsed/ITERS))

Scipy spmv product took 0.06693172454833984 seconds per iteration.
