In [4]:
%%writefile array_addition_numba.py
import numpy as np
from numba import cuda
import hashlib

@cuda.jit
def add_array(a, b, c):
  i = cuda.threadIdx.x + cuda.blockDim.x * cuda.blockIdx.x
  if i < a.size:
    c[i] = a[i] + b[i]

N = 20
a = np.arange(N, dtype=np.float32)
b = np.arange(N, dtype=np.float32)

# Allocate a device array (dev_C) on the GPU using cuda.device_array_like:
dev_c = cuda.device_array_like(a)

add_array[4, 8](a, b, dev_c)

# Copy the result (dev_c) back to the host and print it:
c = dev_c.copy_to_host()

print(c)


Writing array_addition_numba.py


In [5]:
!python array_addition_numba.py

[ 0.  2.  4.  6.  8. 10. 12. 14. 16. 18. 20. 22. 24. 26. 28. 30. 32. 34.
 36. 38.]


In [6]:
import numpy as np
from numba import cuda
import hashlib

@cuda.jit
def add_array(a, b, c):
  i = cuda.threadIdx.x + cuda.blockDim.x * cuda.blockIdx.x
  if i < a.size:
    c[i] = a[i] + b[i]

N = 20
a = np.arange(N, dtype=np.float32)
b = np.arange(N, dtype=np.float32)

# Allocate a device array (dev_C) on the GPU using cuda.device_array_like:
dev_a = cuda.to_device(a)
dev_b = cuda.to_device(b)
dev_c = cuda.device_array_like(a)



add_array[4, 8](dev_a, dev_b, dev_c)

# Copy the result (dev_c) back to the host and print it:
c = dev_c.copy_to_host()

print(c)


[ 0.  2.  4.  6.  8. 10. 12. 14. 16. 18. 20. 22. 24. 26. 28. 30. 32. 34.
 36. 38.]


