<a href="https://colab.research.google.com/github/shelke16/HPC/blob/main/CUDA_Add_Vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
%%writefile vector_add.cu
#include <iostream>
#include <cuda_runtime.h>
#define BLOCK_SIZE 16
using namespace std;

void fill_array(int *arr, int size) {
    for (int i = 0; i < size; i++) {
        arr[i] = rand() % 100;
    }
}

void print_array(int *arr, int size) {
    for (int i = 0; i < size; i++) {
        cout << arr[i] << " ";
    }
    cout << endl;
}

__global__ void add(int *arr1, int *arr2, int *arr3, int size) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < size) {
        arr3[idx] = arr1[idx] + arr2[idx];
    }
}

int main() {
    int size;
    cout << "Enter size of vector: ";
    cin >> size;

    int *arr1_cpu = new int[size];
    int *arr2_cpu = new int[size];
    int *result_cpu = new int[size];

    fill_array(arr1_cpu, size);
    cout << "Array 1: ";
    print_array(arr1_cpu, size);

    fill_array(arr2_cpu, size);
    cout << "Array 2: ";
    print_array(arr2_cpu, size);

    int *arr1_gpu, *arr2_gpu, *result_gpu;
    cudaMallocManaged(&arr1_gpu, size * sizeof(int));
    cudaMallocManaged(&arr2_gpu, size * sizeof(int));
    cudaMallocManaged(&result_gpu, size * sizeof(int));

    cudaMemcpy(arr1_gpu, arr1_cpu, size * sizeof(int),
cudaMemcpyHostToDevice);
    cudaMemcpy(arr2_gpu, arr2_cpu, size * sizeof(int),
cudaMemcpyHostToDevice);

    int threadsPerBlock = 16;
    int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
    add<<<blocksPerGrid, threadsPerBlock>>>(arr1_gpu, arr2_gpu,
result_gpu, size);

    cudaMemcpy(result_cpu, result_gpu, size * sizeof(int),
cudaMemcpyDeviceToHost);

    cout << "Result Array: ";
    print_array(result_cpu, size);

    // Free memory
    delete[] arr1_cpu;
    delete[] arr2_cpu;
    delete[] result_cpu;
    cudaFree(arr1_gpu);
    cudaFree(arr2_gpu);
    cudaFree(result_gpu);

    return 0;
}

Overwriting vector_add.cu


In [32]:
!nvcc -o vector_add vector_add.cu
!nvcc -arch=sm_75 -o vector_add vector_add.cu
!./vector_add

Enter size of vector: 4
Array 1: 83 86 77 15 
Array 2: 93 35 86 92 
Result Array: 0 0 0 0 
