In [1]:
!pip install pycuda # install cuda
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2023.1.1-py2.py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2024.1-cp310-cp310-linux_x86_64.whl size=661205 sha256=c5e5177

In [2]:
# CUDA kernel
modd = SourceModule("""
__global__ void times_two(const double* A, double* B)
      {
      uint index = blockIdx.x * blockDim.x + threadIdx.x;
      B[index] = A[index] * 2;
      }
  """)

In [3]:
import numpy as np

# Create the input vectors.

a = np.random.randn(10)
a = a.astype(float)

# Allocate the memory on the GPU and copy the vectors.

a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
cuda.memcpy_htod(a_gpu, a)

b = np.empty_like(a)
b_gpu = cuda.mem_alloc(b.size * b.dtype.itemsize)

# Call the CUDA kernel.

vec_add = modd.get_function("times_two")
vec_add(a_gpu, b_gpu, block=(10, 1, 1), grid=(1, 1, 1))


# Copy the result back to the host.

cuda.memcpy_dtoh(b, b_gpu)

a_gpu.free()
b_gpu.free()


# Do same calculation in CPU.

b_cpu = a * 2

  # Verify the result
print(b)
print(b_cpu)
if (b_cpu == b).all():
  print("Both vectors are the same.")
else:
  print("Vectors are not equal, something went wrong.")


[-1.18635089 -1.46675775 -1.73712091 -3.44403885  0.65204713  0.20853664
  1.63180494 -0.88420605 -2.37486045  3.07298196]
[-1.18635089 -1.46675775 -1.73712091 -3.44403885  0.65204713  0.20853664
  1.63180494 -0.88420605 -2.37486045  3.07298196]
Both vectors are the same.
