In [41]:
from numba import cuda
from numba import jit
import numpy as np
import time

In [42]:
def maxElement(arr, n):
    max = arr[0]
    for i in range(1, n):
        if arr[i] > max:
            max = arr[i]
    return max

In [43]:
@jit 
def maxElementJit(arr, n):
    max = arr[0]
    for i in range(1, n):
        if arr[i] > max:
            max = arr[i]
    return max

In [44]:
@cuda.jit
def cuda_max_idx(arr, max1):
    tx = cuda.threadIdx.x
    ty = cuda.blockIdx.x
    bw = cuda.blockDim.x
    pos = tx + ty * bw
    
    while(pos < arr.size):
        if arr[pos] > max1[0]:
            max1[0] = arr[pos]
        pos += cuda.blockDim.x * cuda.gridDim.x; 

In [51]:
arr = np.random.randint(1000, size = 1000000)

start = time.time()
maxElement(arr, 1000000)
end = time.time() 
s = end - start
print("Sequential Time: ", s)

start = time.time()
maxElementJit(arr, 1000000)
end = time.time() 
j = end - start
print("Jit Time: ", j)

max1 = np.zeros(1)
start = time.time()
cuda_max_idx[128, 64](arr, max1)
end = time.time() 
c = end - start
print("CUDA Time: ", c)

print("speedup-jit: ", s/j)
print("speedup-cuda: ", s/c)

Sequential Time:  0.12113571166992188
Jit Time:  0.0007665157318115234
CUDA Time:  0.006484270095825195
speedup-jit:  158.03421461897355
speedup-cuda:  18.681472221200867


115 ms ± 5.37 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
4.19 ms ± 29.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
