**CUDA**

Евгений Борисов <borisov.e@solarl.ru>

In [1]:
import numpy as np

import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule

In [2]:
mod = SourceModule('''
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
''')

multiply_them = mod.get_function('multiply_them')

In [3]:
n=100

a = np.random.randn(n).astype(np.float32)
b = np.random.randn(n).astype(np.float32)
z = np.zeros(n, dtype=np.float32)

In [4]:
multiply_them( drv.Out(z), drv.In(a), drv.In(b), block=(n,1,1), grid=(1,1))

In [5]:
print('diff:', z-a*b)

diff: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
