In [1]:
from numba import jit, cuda
import numpy as np
# to measure exec time
from timeit import default_timer as timer 

# normal function to run on cpu
def func(a):
    for i in range(100000000):
        a[i]+= 1
    return a

# function optimized to run on gpu 
@jit(target_backend='cuda')
def func_numba_cuda(a):
    for i in range(100000000):
        a[i]+= 1
    return a

@jit 
def func_numba(a):
    for i in range(100000000):
        a[i]+= 1
    return a

@jit 
def func_numba_nonvecable(a):
    asum=0.0
    for i in range(100000000):
        a[i]+= 1
        asum += a[i]
    return asum

def func_nonvecable(a):
    asum=0.0
    for i in range(100000000):
        a[i]+= 1
        asum += a[i]
    return asum



n = 100000000
a = np.ones(n, dtype = np.float64)
start = timer()
b=func(a)
print("numpy:                 ", timer()-start, " result= ",b.sum()) 

a = np.ones(n, dtype = np.float64)
start = timer()
b=func_nonvecable(a)
print("numpy nonvecable:     ", timer()-start, " result= ",b)

a = np.ones(n, dtype = np.float64)
start = timer()
b=func_numba_nonvecable(a)
print("numba jit nonvecable:", timer()-start, " result= ",b)

a = np.ones(n, dtype = np.float64)
start = timer()
b=func_numba(a)
print("numba jit:           ", timer()-start, " result= ",b.sum())
   
a = np.ones(n, dtype = np.float64)
start = timer()
b=func_numba_cuda(a)
print("numba jit cuda:      ", timer()-start, " result= ",b.sum())


numpy:                  29.969198439037427  result=  200000000.0
numpy nonvecable:      43.09104367007967  result=  200000000.0
numba jit nonvecable: 0.7172567119123414  result=  200000000.0
numba jit:            0.14147545502055436  result=  200000000.0


KeyError: "<class 'numba.core.cpu.CPUTargetOptions'> does not support option: 'target_backend'"

In [None]:
dace_func = dace.program(auto_optimize=True)(func)
numba_func = numba.jit(nopython=True, parallel=True)(func)

a = np.ones(n, dtype = np.float64)
start = timer()
b=dace_func(a)
print("dace:               ", timer()-start, " result= ",b.sum())

a = np.ones(n, dtype = np.float64)
start = timer()
b=numba_func(a)
print("numba:               ", timer()-start, " result= ",b.sum())