In [None]:
!pip install pyopencl

### Kernel

In [None]:
%%writefile program.cl

__kernel void square(__global double* vec, __global double* squares){
    int id = get_global_id(0);

    squares[id] = vec[id]*vec[id];
}

__kernel void normalize(__global double* vec, __global double* magnitude) {
	int id = get_global_id(0);

	vec[id] /= magnitude[0];
}

### Runtime

In [None]:
import numpy as np
import pyopencl as cl

np.random.seed(0)

In [None]:
VEC_SIZE = 10000000
RANGE = 100

vec = np.random.uniform(-RANGE, RANGE, VEC_SIZE).astype(np.float64)
squares = np.zeros(VEC_SIZE).astype(np.float64)
magnitude = np.zeros(1, dtype=np.float64)

print(vec)
print(squares)
print(magnitude)

In [None]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

In [None]:
mf = cl.mem_flags
vec_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=vec)
squares_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=squares)
magnitude_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=magnitude)

In [None]:
program_file = open("program.cl", "r")
program_src = program_file.read()

program = cl.Program(ctx, program_src)
program.build()                              

In [None]:
kernel_square = program.square
kernel_normalize = program.normalize

kernel_square.set_args(vec_buf, squares_buf)
kernel_normalize.set_args(vec_buf, magnitude_buf)

In [None]:
global_size = (VEC_SIZE,)

cl.enqueue_nd_range_kernel(queue, kernel_square, global_size, None)
cl.enqueue_copy(queue, squares, squares_buf)
queue.finish()
magnitude = np.sqrt(np.sum(squares))

In [None]:
cl.enqueue_copy(queue, magnitude_buf, magnitude)
cl.enqueue_nd_range_kernel(queue, kernel_normalize, global_size, None)

In [None]:
cl.enqueue_copy(queue, vec, vec_buf)
print(vec)
print("Vector magnitude = ", np.linalg.norm(vec))