In [2]:
from numba import cuda

import pycuda.driver as pycuda
# We use autoprimaryctx instead of autoinit because Numba can only operate on a
# primary context
import pycuda.autoprimaryctx  # noqa
import pycuda.gpuarray as gpuarray

import numpy
import os
os.environ["CUDA_HOME"]="/usr/local/cuda"

# Create a PyCUDA gpuarray
a_gpu = gpuarray.to_gpu(numpy.random.randn(4, 4).astype(numpy.float32))
print("original array:")
print(a_gpu)


# A standard Numba kernel that doubles its input array
@cuda.jit
def double(x):
    i, j = cuda.grid(2)

    if i < x.shape[0] and j < x.shape[1]:
        x[i, j] *= 2


# Call the Numba kernel on the PyCUDA gpuarray, using the CUDA Array Interface
# transparently
double[(4, 4), (1, 1)](a_gpu)
print("doubled with numba:")
print(a_gpu)

original array:
[[ 0.8024446   1.0366786  -0.60228217  0.2861784 ]
 [ 0.23595941  2.5766768   0.55724376 -0.4264042 ]
 [-0.2525655   0.3432881   0.39916003  0.75750685]
 [ 0.02231747  0.68154705  0.9312098   0.9616823 ]]
doubled with numba:
[[ 1.6048892   2.073357   -1.2045643   0.5723568 ]
 [ 0.47191882  5.1533537   1.1144875  -0.8528084 ]
 [-0.505131    0.6865762   0.79832006  1.5150137 ]
 [ 0.04463494  1.3630941   1.8624196   1.9233646 ]]


