In [1]:
from numba import vectorize
import numpy as np
from timeit import default_timer as timer

# GPU accelerated

Vector addition using GPU. This should be quite fast.

In [2]:
@vectorize(['float32(float32, float32)'], target='cuda')
def vector_add_gpu(a, b):
    return a + b

def test_vector_add_gpu():
    
    N = 32000000
    A = np.ones(N, dtype=np.float32)
    B = np.ones(N, dtype=np.float32)
    C = np.zeros(N, dtype=np.float32)
    
    start = timer()
    C = vector_add_gpu(A, B)
    vector_add_gpu_time = timer() - start
    
    print('C[:5] = {}'.format(C[:5]))
    print('C[-5:] = {}'.format(C[-5:]))
    
    print('vector_add_gpu took {} seconds'.format(vector_add_gpu_time))

In [4]:
test_vector_add_gpu()

C[:5] = [ 2.  2.  2.  2.  2.]
C[-5:] = [ 2.  2.  2.  2.  2.]
vector_add_gpu took 0.1461798809999948 seconds


# CPU benchmark

Vector addition using CPU. This should be much slower (~10s to minutes).

In [5]:
def vector_add_cpu(a, b, c):
    for i in range(a.size):
        c[i] = a[i] + b[i]
        
def test_vector_add_cpu():
    
    N = 32000000
    A = np.ones(N, dtype=np.float32)
    B = np.ones(N, dtype=np.float32)
    C = np.zeros(N, dtype=np.float32)
    
    start = timer()
    vector_add_cpu(A, B, C)
    vector_add_cpu_time = timer() - start
    
    print('C[:5] = {}'.format(C[:5]))
    print('C[-5:] = {}'.format(C[-5:]))
    
    print('vector_add_cpu took {} seconds'.format(vector_add_cpu_time))

In [6]:
test_vector_add_cpu()

C[:5] = [ 2.  2.  2.  2.  2.]
C[-5:] = [ 2.  2.  2.  2.  2.]
vector_add_cpu took 10.688909762999998 seconds
