<a href="https://colab.research.google.com/github/shreyanshML100/Undergraduate-Project-II/blob/main/Parallel_Computing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-vhlf8ups
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-vhlf8ups
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0d2ab99cccbbc682722e708515fe9c4cfc50185a
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4716 sha256=6071fa22c07928457ce8b8e82ad7347b48519e2a67c240bb98e98473cdb24472
  Stored in directory: /tmp/pip-ephem-wheel-cache-nm9yipfb/wheels/a8/b9/18/23f8ef71ceb0f63297dd1903aedd067e6243a68ea756d6feea
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2
created output directory at /content

The following Code Implements Parallel Computing to Speed-Up the simulation which simulates the Translatory Motion of a large number of Spherical Particles moving with random velocities.

In [None]:
%%cu
#include <iostream>
#include <cmath>
#include <sys/time.h>
#include <stdlib.h>

using namespace std ;

int N=1000000;
float x0[1000000], v0[1000000], a0[1000000];
float dt=1;


double cpuSec()
{
    struct timeval tp;
    gettimeofday(&tp, NULL);
    return((double)tp.tv_sec+(double)tp.tv_usec*1.e-6);
}


void assignInit()
{
    int i;
    for(i=0;i<N;i++)
    {
        x0[i] = 0;
        v0[i] = 0.1+0.9*rand();
        a0[i] = 1+9*rand();
    }
}


void operateCPU()
{
    for (int j=1;j<=1000;j++)
    {
      for(int i=1;i<N;i++)
      {
        x0[i] = x0[i]+v0[i]*dt+0.5*a0[i]*dt*dt;
        v0[i] = v0[i]+a0[i]*dt;
      }
    }
}


__global__ void operateGPU(float* d_x, float* d_v, float* d_a)
{
    int i = threadIdx.x;
    float dt=1;
    for(int j=1;j<=1000;j++)
    {
      d_x[i] = d_x[i]+d_v[i]*dt+0.5*d_a[i]*dt*dt;
      d_v[i] = d_v[i] + d_a[i]*dt;
    }
}


int main()
{
    double istart = cpuSec();
    assignInit();
    double iElaps1 = cpuSec() - istart;


    istart = cpuSec();
    operateCPU();
    double iElaps2 = cpuSec() - istart;

    cout<<iElaps1<<"\n";
    cout<<"Time taken to operate on CPU: "<<iElaps2<<"\n";

    assignInit();

    float *d_x, *d_v, *d_a;
    int size = sizeof(int)*N;

    cudaMalloc((float **)&d_x, size);
    cudaMalloc((float **)&d_v, size);
    cudaMalloc((float **)&d_a, size);

    cudaMemcpy(d_x, x0, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_v, v0, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_a, a0, size, cudaMemcpyHostToDevice);

    istart = cpuSec();
    operateGPU <<<1000,N/1000>>>(d_x, d_v, d_a);         // 1 Block, N threads per Block
    cudaDeviceSynchronize();
    double iElaps3 = cpuSec() - istart;
    cout<<"Time taken to operate on GPU: "<<iElaps3<<"\n";

    float s = iElaps2/iElaps3;
    cout<<"Speed Up = "<<s<<"\n";
    //cout<<"Efficiency = "<<s*100/N<<"%\n";

    cudaMemcpy(x0, d_x, size, cudaMemcpyDeviceToHost);
    cudaMemcpy(v0, d_v, size, cudaMemcpyDeviceToHost);

    cudaFree(d_x);
    cudaFree(d_v);
    cudaFree(d_a);

    return 0;
}


0.0602989
Time taken to operate on CPU: 6.39877
Time taken to operate on GPU: 9.05991e-06
Speed Up = 706274

