<a href="https://colab.research.google.com/github/trefftzc/trefftzc/blob/main/DotProductWithNumba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Exploring how to use numba in Google Colab
Based on this example:
 https://colab.research.google.com/github/cbernet/maldives/blob/master/numba/numba_cuda.ipynb#scrollTo=y1ge-bW_ajh5
 

In [1]:
!find / -iname 'libdevice'
!find / -iname 'libnvvm.so'

/usr/local/lib/python3.7/dist-packages/jaxlib/cuda/nvvm/libdevice
/usr/local/cuda-11.2/nvvm/libdevice
/usr/local/cuda-11.2/nvvm-prev/libdevice
find: ‘/proc/27/task/27/net’: Invalid argument
find: ‘/proc/27/net’: Invalid argument
/usr/local/cuda-11.2/nvvm/lib64/libnvvm.so
/usr/local/cuda-11.2/nvvm-prev/lib64/libnvvm.so
find: ‘/proc/27/task/27/net’: Invalid argument
find: ‘/proc/27/net’: Invalid argument
find: ‘/proc/107’: No such file or directory
find: ‘/proc/108’: No such file or directory


In [2]:
import os
os.environ['NUMBAPRO_LIBDEVICE'] = "/usr/local/cuda-11.2/nvvm/libdevice"
os.environ['NUMBAPRO_NVVM'] = "/usr/local/cuda-11.2/nvvm/lib64/libnvvm.so"

In [3]:
from numba import cuda
import numpy as np

@cuda.jit
def dotProduct(array1,array2,array3): 
  pos = cuda.grid(1)
  if pos < array1.size:
    array3[pos] = array1[pos]*array2[pos]
  

n = 1000

a_cpu = np.linspace(1.0,1000.0,1000)
b_cpu = np.linspace(1.0,1000.0,1000)
c_cpu = np.zeros(1,dtype=np.float32)

c_gpu = cuda.device_array(shape=(n,),dtype=np.float32)
for i in range(0,n):
  a_cpu[i] = i*1.0
  b_cpu[i] = i*1.0

a_gpu = cuda.to_device(a_cpu)
b_gpu = cuda.to_device(b_cpu)


threadsperblock = 32
blockspergrid = (a_gpu.size + (threadsperblock - 1)) // threadsperblock
dotProduct[blockspergrid, threadsperblock](a_gpu,b_gpu,c_gpu)

c_cpu = c_gpu.copy_to_host()

for i in range(0,10):
  print(c_cpu[i])



    



0.0
1.0
4.0
9.0
16.0
25.0
36.0
49.0
64.0
81.0
