In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.3-py2.py3-none-any.whl (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.4/87.4 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting appdirs>=1.4.0 (from pycuda)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  C

In [10]:
%%writefile hello_world_pycuda.py
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

# CUDA kernel code to return data
cuda_code = """
__global__ void return_data(char *output) {
    const char msg[] = "Hello, World!";
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < sizeof(msg)) {
        output[idx] = msg[idx];
    }
}
"""


# Compile the CUDA module
cuda_module = SourceModule(cuda_code)
return_data_kernel = cuda_module.get_function("return_data")

#Allocate memory on the GPU for the output
output_size = 15 # Length of the "Hello, World!" string + null terminator
output_gpu = cuda.mem_alloc(output_size)

# Set up block and grid dimensions
block_dim = (output_size, 1, 1)
grid_dim = (1, 1)

# Launch the kernel
return_data_kernel(output_gpu, block=block_dim, grid=grid_dim)

# Copy the result back to the host
output_host = np.empty(output_size, dtype=np.uint8)
cuda.memcpy_dtoh(output_host, output_gpu)

#Convert the result to a string and print it
output_str = ''.join(chr(c) for c in output_host)
print(output_str)





Overwriting hello_world_pycuda.py


In [11]:
!python hello_world_pycuda.py

Hello, World!  
