In [1]:
!pip install numba

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import numpy as np
from numba import cuda

# CUDA kernel to perform vector addition
@cuda.jit
def vector_addition(a, b, result):
    idx = cuda.threadIdx.x + cuda.blockDim.x * cuda.blockIdx.x
    if idx < len(a):
        result[idx] = a[idx] + b[idx]

# User input for vector length
vector_length = int(input("Enter the length of the vectors: "))

# User input for vector elements
vector_a = np.zeros(vector_length, dtype=np.float32)
vector_b = np.zeros(vector_length, dtype=np.float32)
for i in range(vector_length):
    vector_a[i] = float(input("Enter the element of vector a at index {}: ".format(i)))
    vector_b[i] = float(input("Enter the element of vector b at index {}: ".format(i)))

# Allocate memory on the GPU
device_vector_a = cuda.to_device(vector_a)
device_vector_b = cuda.to_device(vector_b)
device_result = cuda.device_array_like(vector_a)

# Define the number of threads per block and the number of blocks
threads_per_block = 32
blocks_per_grid = (vector_length + (threads_per_block - 1)) // threads_per_block

# Launch the kernel
vector_addition[blocks_per_grid, threads_per_block](device_vector_a, device_vector_b, device_result)

# Copy the result back to the CPU
result = device_result.copy_to_host()

# Print the result
print("Result:", result)


Enter the length of the vectors: 2
Enter the element of vector a at index 0: 5
Enter the element of vector b at index 0: 5
Enter the element of vector a at index 1: 6
Enter the element of vector b at index 1: 6




Result: [10. 12.]
