<a href="https://colab.research.google.com/github/ravigitX/cuda-programs/blob/main/PDS_ASSIGNMENT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PARALLEL REDUCTION**

In [None]:
%%writefile ParallelReduction.cu

#include<stdio.h>
#include<math.h>

__global__ void parallel(int *a, int n)
{
  int i=threadIdx.x,j;
  if(i< n/2)
  {
    int max = (int) ceil(log2((double)n)) ;
    for(j=0; j<max; j++)
    {
      int u = pow(2,j);
      int v = 2*i + u;
      if(i%u == 0 && v<n)
      {
            a[2*i] += a[v];
      }
      __syncthreads();
    }
  }
}
int main()
{

  int *a, *da;
  int i, n = 8, size=n*sizeof(int);

  a= (int *)malloc(size);

  for(i=0; i<n; i++)
  {
    a[i]=i+1;
  }

  printf("Original: \n");
  for(i=0; i<n; i++)
  {
    printf("%d ",a[i]);
  }
  printf("\n");
  cudaMalloc((void **)&da, size);
  cudaMemcpy(da, a, size, cudaMemcpyHostToDevice);

  parallel<<<1,n>>>(da,n);

  cudaMemcpy(a, da, size, cudaMemcpyDeviceToHost);

  printf("Sum: %d ",a[0]);
  printf("\n");

  free(a);
  cudaFree(da);

  return 0;

  }


Overwriting ParallelReduction.cu


In [None]:
!nvcc -arch=sm_75 ParallelReduction.cu -o ParallelReduction
!./ParallelReduction

Original: 
1 2 3 4 5 6 7 8 
Sum: 36 


# **PREFIX SUM**

In [None]:
%%writefile Prefixsum.cu
#include <stdio.h>

__global__ void prefixSum(int *a, int *b, int n) {
    int i = threadIdx.x;  // works only for single block
    if(i < n) {
        int sum = 0;
        for(int j=0; j<=i; j++)
            sum += a[j];
        b[i] = sum;
    }
}

int main() {
    int a[] = {1,2,3,4,5};
    int n = sizeof(a)/sizeof(a[0]);
    int b[n];

    int *da, *db;
    cudaMalloc(&da, n*sizeof(int));
    cudaMalloc(&db, n*sizeof(int));
    cudaMemcpy(da, a, n*sizeof(int), cudaMemcpyHostToDevice);

    prefixSum<<<1, n>>>(da, db, n);
    cudaDeviceSynchronize();

    cudaMemcpy(b, db, n*sizeof(int), cudaMemcpyDeviceToHost);

    printf("Prefix Sum: ");
    for(int i=0; i<n; i++)
        printf("%d ", b[i]);
    printf("\n");

    cudaFree(da);
    cudaFree(db);
    return 0;
}


Overwriting Prefixsum.cu


In [None]:
!nvcc -arch=sm_75 Prefixsum.cu -o Prefixsum
!./Prefixsum

Prefix Sum: 1 3 6 10 15 


# **SUFFIX SUM**

In [None]:
%%writefile Suffixsum.cu

#include<stdio.h>

__global__ void prefixSum(int *a, int *b, int n)
{
  int i = threadIdx.x,j,sum=0;
  if(i<n)
  {
    for(j=n-1; j>=i; j--)
      sum += (a[j]);
    __syncthreads();
    b[i] = sum;
  }
}

int main()
{
  int a[] = {1, 2, 3, 4, 5};
  int n = sizeof(a)/sizeof(a[0]);

  int *da, b[n], *db,i;

  cudaMalloc(&da, n*sizeof(int));
  cudaMalloc(&db, n*sizeof(int));
  cudaMemcpy(da, a, n*sizeof(int), cudaMemcpyHostToDevice);

  prefixSum<<<1, n>>>(da,db,n);

  cudaMemcpy(b, db, n*sizeof(int), cudaMemcpyDeviceToHost);

  for(i=0; i<n; i++)
    printf("%d ",b[i]);
  printf("\n");

  cudaFree(da);
  cudaFree(db);
  return 0;

}


Overwriting Suffixsum.cu


In [None]:
!nvcc -arch=sm_75 Suffixsum.cu -o Suffixsum
!./Suffixsum


15 14 12 9 5 


# **PALINDROM**

In [None]:
%%writefile Palindrome.cu

#include <stdio.h>
#include <cuda.h>
#include <string.h>

__global__ void check(char *str, int n, int *flag) {
    int i = threadIdx.x + blockIdx.x * blockDim.x;
    if(i < n/2) {
        if(str[i] != str[n-1-i]) {
            *flag = 0;
        }
    }
}

int main(){
    char hstr[] = "racecar";
    int n = strlen(hstr);

    char *dstr;
    int *dflag, hflag = 1;

    cudaMalloc((void **)&dflag, sizeof(int));
    cudaMemcpy(dflag, &hflag, sizeof(int), cudaMemcpyHostToDevice);

    cudaMalloc((void **)&dstr, n*sizeof(char));
    cudaMemcpy(dstr, hstr, n*sizeof(char), cudaMemcpyHostToDevice);

    int threads = n/2;
    if(threads == 0) threads = 1;

    check<<<1, threads>>>(dstr, n, dflag);
    cudaDeviceSynchronize();

    cudaMemcpy(&hflag, dflag, sizeof(int), cudaMemcpyDeviceToHost);

    if(hflag) {
        printf("The string \"%s\" is a palindrome.\n", hstr);
    } else {
        printf("The string \"%s\" is NOT a palindrome.\n", hstr);
    }

    cudaFree(dstr);
    cudaFree(dflag);

    return 0;
}



Overwriting Palindrome.cu


In [None]:
!nvcc -arch=sm_75 Palindrome.cu -o Palindrome
!./Palindrome


The string "racecar" is a palindrome.


# **ENUMERATION SORT**

In [None]:
%%writefile EnumerationSort.cu

#include <stdio.h>
#include <cuda.h>
#define n 9

__global__ void enumeration(int *a, int *b)
{
    int idx = threadIdx.x, j;
    if(idx < n)
    {
        int c = 0;
        int curr = a[idx];
        for(j = 0; j < n; j++)
        {
            if(a[j] < curr || (a[j] == curr && j < idx)) // FIXED here
                c++;
        }
        b[c] = curr;
    }
}

int main()
{
    int a[] = {9,8,7,6,5,4,3,2,1}, *da, b[n], *db;
    int size = n * sizeof(int), i;

    cudaMalloc((void **)&da, size);
    cudaMalloc((void **)&db, size);

    cudaMemcpy(da, a, size, cudaMemcpyHostToDevice);

    enumeration<<<1, n>>>(da, db);
    cudaDeviceSynchronize();

    cudaMemcpy(b, db, size, cudaMemcpyDeviceToHost);

    for(i = 0; i < n; i++)
        printf("%d ", b[i]);
    printf("\n");

    cudaFree(da);
    cudaFree(db);

    return 0;
}


Overwriting EnumerationSort.cu


In [None]:
!nvcc -arch=sm_75 EnumerationSort.cu -o EnumerationSort
!./EnumerationSort


1 2 3 4 5 6 7 8 9 


# **ODD-EVEN SORT**

In [None]:
%%writefile OddEven.cu

#include<stdio.h>
#include<cuda.h>
#define n 9
__global__ void oddeven(int *a)
{
  int tid=threadIdx.x, i, t;
  for(i=1; i<=n/2 + 1; i++)
  {
    if(tid<n-1 && tid%2 == 1)
    {
      t = a[tid+1];
      a[tid+1] = max(a[tid],t);
      a[tid] = min(a[tid],t);
    }
    __syncthreads();
    if(tid<n-1 && tid%2 == 0)
    {
      t = a[tid+1];
      a[tid+1] = max(a[tid],t);
      a[tid] = min(a[tid],t);
    }
    __syncthreads();
  }
}

int main()
{
  int a[] = {9,8,7,6,5,4,3,2,1}, *da, size = n*sizeof(int),i;
  cudaMalloc((void **)&da, size);
  cudaMemcpy(da,a,size,cudaMemcpyHostToDevice);
  oddeven<<<1,n>>>(da);
  cudaMemcpy(a,da,size,cudaMemcpyDeviceToHost);
  for(i=0; i<n; i++)
    printf("%d ",a[i]);
  printf("\n");
  return 0;
}


Overwriting OddEven.cu


In [None]:
!nvcc -arch=sm_75 OddEven.cu -o OddEven
!./OddEven


1 2 3 4 5 6 7 8 9 


# **VECTOR**

In [None]:
%%writefile Vector.cu
#include <stdio.h>
#include <cuda.h>

__device__ int gcdfunc(int a,int b){
  if(b==0) return a;
  return gcdfunc(b,a%b);
}
__device__ int lcmfunc(int a,int b){
  if(a == 0 || b == 0) return 0;
  return (a/gcdfunc(a,b))*b;
}

__global__ void Vector(int *a,int *b,int *max,int *min,int *gcd,int *lcm,int n){
  int id = blockIdx.x*blockDim.x+threadIdx.x;
  if(id >= n) return;

  int aa = a[id];
  int bb = b[id];

  max[id] = (aa > bb) ? aa : bb;
  min[id] = (aa < bb) ? aa : bb;
  gcd[id] = gcdfunc(aa,bb);
  lcm[id] = lcmfunc(aa,bb);
}

int main(){
  int n = 6;

  int a[] = {12,18,25,40,7,15};
  int b[] = {8,24,30,10,21,5};

  int *da,*db,*dmax,*dmin,*dgcd,*dlcm;
  int hmax[n],hmin[n],hgcd[n],hlcm[n];

  cudaMalloc((void **)&da, n*sizeof(int));
  cudaMalloc((void **)&db, n*sizeof(int));
  cudaMalloc((void **)&dmax, n*sizeof(int));
  cudaMalloc((void **)&dmin, n*sizeof(int));
  cudaMalloc((void **)&dgcd, n*sizeof(int));
  cudaMalloc((void **)&dlcm, n*sizeof(int));

  cudaMemcpy(da,a,n*sizeof(int),cudaMemcpyHostToDevice);
  cudaMemcpy(db,b,n*sizeof(int),cudaMemcpyHostToDevice);

  Vector<<<1,n>>>(da,db,dmax,dmin,dgcd,dlcm,n);
  cudaDeviceSynchronize();

  cudaMemcpy(hmax,dmax,n*sizeof(int),cudaMemcpyDeviceToHost);
  cudaMemcpy(hmin,dmin,n*sizeof(int),cudaMemcpyDeviceToHost);
  cudaMemcpy(hgcd,dgcd,n*sizeof(int),cudaMemcpyDeviceToHost);
  cudaMemcpy(hlcm,dlcm,n*sizeof(int),cudaMemcpyDeviceToHost);

  printf("\nA:    ");
  for(int i=0;i<n;i++) printf("%d ",a[i]);

  printf("\nB:    ");
  for(int i=0;i<n;i++) printf("%d ",b[i]);

  printf("\nMax:  ");
  for(int i=0;i<n;i++) printf("%d ",hmax[i]);

  printf("\nMin:  ");
  for(int i=0;i<n;i++) printf("%d ",hmin[i]);

  printf("\nGCD:  ");
  for(int i=0;i<n;i++) printf("%d ",hgcd[i]);

  printf("\nLCM:  ");
  for(int i=0;i<n;i++) printf("%d ",hlcm[i]);

  printf("\n");

  cudaFree(da);
  cudaFree(db);
  cudaFree(dmax);
  cudaFree(dmin);
  cudaFree(dgcd);
  cudaFree(dlcm);

  return 0;
}


Overwriting Vector.cu


In [None]:
!nvcc -arch=sm_75 Vector.cu -o Vector
!./Vector



A:    12 18 25 40 7 15 
B:    8 24 30 10 21 5 
Max:  12 24 30 40 21 15 
Min:  8 18 25 10 7 5 
GCD:  4 6 5 10 7 5 
LCM:  24 72 150 40 21 15 


# **CONSTANT MEMORY**

In [None]:
%%writefile Constant.cu

#include<stdio.h>
#define n 10

__constant__ float arr[100];
__global__ void constantmem(float *b, int num)
{
  int i=threadIdx.x;
  if(i<num-1)
    b[i] = (arr[i] + arr[i+1])/2.0f;
}

int main()
{
  float *a, *b, *db;
  int num , i;

  printf("No of elements? ");
  scanf("%d", &num);

  int in = num * sizeof(float);
  int out = (num-1)*sizeof(float);

  a = (float *)malloc(in);
  b = (float *)malloc(out);

  for(i=0; i<num; i++)
    a[i] = i+1;
  for(i=0; i<num-1; i++)
    b[i] = 0.0f;

  cudaMalloc((void **)&db, out);
  cudaMemcpyToSymbol(arr,a,in);
  cudaMemcpy(db,b,out,cudaMemcpyHostToDevice);

  constantmem<<<1,num>>>(db,num);
  cudaDeviceSynchronize();

  cudaMemcpy(b,db,out,cudaMemcpyDeviceToHost);

  for(i=0; i<num-1; i++)
    printf("%f ",b[i]);
  printf("\n");

  free(a);
  free(b);
  cudaFree(db);
  return 0;

}

Overwriting Constant.cu


In [None]:
!nvcc -arch=sm_75 Constant.cu -o Constant
!./Constant


No of elements? 10
1.500000 2.500000 3.500000 4.500000 5.500000 6.500000 7.500000 8.500000 9.500000 


# **PRIME NUMBER GENERATION**

In [None]:
%%writefile PrimeNumberGeneration.cu

#include <stdio.h>
#include <cuda.h>
#include <math.h>

__global__ void genaratePrime(int *p,int n){
  int id = threadIdx.x;
  if(id < 2){
    p[id] = 0;
    return ;
  }

  int isprime = 1;

  for(int i=2;i*i <= id;i++){
    if(id % i == 0){
      isprime = 0;
      break;
    }
  }

  p[id] = isprime;
}

int main(){
  int size = 100;

  int *h_prime;
  int *d_prime;

  h_prime = (int*)malloc(sizeof(int) * size);

  cudaMalloc((void**)&d_prime,sizeof(int) * size);

  genaratePrime<<<1,size>>>(d_prime,size);

  cudaMemcpy(h_prime,d_prime,size * sizeof(int),cudaMemcpyDeviceToHost);

  printf("prime numbers [1-100] : \n");
  for(int i = 2;i <= size;i++){
    if(h_prime[i] == 1){
      printf("%d ",i);
    }
  }
  printf("\n");

  free(h_prime);
  cudaFree(d_prime);

  return 0;

}

Overwriting PrimeNumberGeneration.cu


In [None]:
!nvcc -arch=sm_75 PrimeNumberGeneration.cu -o PrimeNumberGeneration
!./PrimeNumberGeneration


prime numbers [1-100] : 
2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 


# **ARMSTRONG NUMBER**

In [None]:
%%writefile Armstrong.cu

#include <stdio.h>
#include <cuda.h>
#include <math.h>

__global__ void gernerateArmstrong(int *d,int n){
  int id = threadIdx.x;

  if(id > n) return;

  int num = id;
  int temp = num;
  int digit = 0;

  while(temp > 0){digit++; temp /= 10;}

  temp = num;
  int sum = 0;

  while(temp > 0){
    int r = temp % 10;
    sum += (int)powf(r,digit);
    temp /= 10;
  }

  if(num == sum && num > 0){
    d[id] = 1;
  }
  else{
    d[id] = 0;
  }

}

int main(){
  int n = 500;

  int *host = (int*)malloc(n*sizeof(int));
  int *device;

  cudaMalloc((void**)&device,sizeof(int)*n);

  gernerateArmstrong<<<1,n>>>(device,n);
  cudaDeviceSynchronize();

  cudaMemcpy(host,device,sizeof(int)*n,cudaMemcpyDeviceToHost);

  printf("\n Armstrong number are : ");

  for(int i=1;i<=n;i++){
    if(host[i] == 1){
      printf("%d ",i);
    }
  }

  free(host);

  cudaFree(device);

  return 0;
}


Overwriting Armstrong.cu


In [None]:
!nvcc -arch=sm_75 Armstrong.cu -o Armstrong
!./Armstrong



 Armstrong number are : 1 2 3 4 5 6 7 8 9 153 370 371 407 

# **NEON NUMBER GENERATION**

In [None]:
%%writefile Neon.cu

#include <stdio.h>
#include <cuda.h>
#include <math.h>

__global__ void generateNeon(int *d,int n){
  int id = threadIdx.x;

  if(id > n) return;

  int sq = id * id;

  int temp = sq;
  int sum = 0;

  while(temp > 0){
    int r = temp % 10;
    sum += r;
    temp /= 10;
  }

  if(sum == id){
    d[id] = 1;
  }
  else{
    d[id] = 0;
  }

}

int main(){
  int n = 100;

  int *host = (int*)malloc(sizeof(int)*n);
  int *device;
  cudaMalloc((void**)&device,sizeof(int)*n);

  generateNeon<<<1,n>>>(device,n);

  cudaDeviceSynchronize();

  cudaMemcpy(host,device,sizeof(int)*n,cudaMemcpyDeviceToHost);

  printf("\n Neon Number are : ");

  for(int i=1;i<=n;i++){
    if(host[i] == 1)
    printf("%d ",i);
  }

  free(host);
  cudaFree(device);

  return 0;
}


Overwriting Neon.cu


In [None]:
!nvcc -arch=sm_75 Neon.cu -o Neon
!./Neon



 Neon Number are : 1 9 

# **PERFECT NUMBER GENERATION**

In [None]:
%%writefile PerfectNumber.cu

#include <stdio.h>
#include <cuda.h>
#include <math.h>

__global__ void PerfectNumber(int *d,int n){
  int id = threadIdx.x;
  if(id >= n || id < 2) return;
  int sum = 1;

  for(int i=2;i<=id/2;i++){
    if(id % i == 0){
      sum += i;
    }
  }

  if(sum == id){
    d[id] = 1;
  }
  else{
    d[id] = 0;
  }
}

int main(){
  int n = 100;

  int *host;
  host = (int*)malloc(sizeof(int)*n);
  int *device;

  cudaMalloc((void**)&device,sizeof(int)*n);

  PerfectNumber<<<1,100>>>(device,n);

  cudaMemcpy(host,device,sizeof(int)*n,cudaMemcpyDeviceToHost);

  printf("\n Perfect Number are : ");
  for(int i=1;i<=n;i++){
    if(host[i] == 1){
      printf("%d ",i);
    }
  }

  free(host);
  cudaFree(device);

  return 0;
}


Overwriting PerfectNumber.cu


In [None]:
!nvcc -arch=sm_75 PerfectNumber.cu -o PerfectNumber

!./PerfectNumber



 Perfect Number are : 6 28 

# **INTERNAL MARK**

In [None]:
%%writefile InternalMark.cu
#include <stdio.h>
#include <cuda.h>
#define n 20

__global__ void calculate(int *a, int *b, int *c, int *ass, float *d) {
    int id = threadIdx.x + blockIdx.x * blockDim.x;
    if (id >= n) return;

    int best1, best2;

    if (a[id] > b[id] && a[id] > c[id]) {
        best1 = a[id];
        best2 = (b[id] > c[id]) ? b[id] : c[id];
    }
    else if (b[id] > a[id] && b[id] > c[id]) {
        best1 = b[id];
        best2 = (a[id] > c[id]) ? a[id] : c[id];
    }
    else {
        best1 = c[id];
        best2 = (a[id] > b[id]) ? a[id] : b[id];
    }

    float r1 = best1 * 0.4f;
    float r2 = best2 * 0.4f;

    d[id] = r1 + r2 + ass[id];
}

int main() {
    int c1[] = {32,46,33,50,34,12,44,47,21,25,43,33,45,11,14,16,49,48,41,31};
    int c2[] = {7,25,41,12,36,3,48,15,28,50,22,9,44,19,33,1,30,46,11,5};
    int c3[] = {8,27,42,16,35,4,49,23,12,31,18,2,46,39,25,7,44,14,21,50};
    int ass[] = {3,7,1,9,5,2,8,10,6,4,7,2,9,1,8,5,3,10,6,4};

    int *dc1, *dc2, *dc3, *dass;
    float *dout;


    cudaMalloc((void**)&dc1, sizeof(int)*n);
    cudaMalloc((void**)&dc2, sizeof(int)*n);
    cudaMalloc((void**)&dc3, sizeof(int)*n);
    cudaMalloc((void**)&dass, sizeof(int)*n);
    cudaMalloc((void**)&dout, sizeof(float)*n);


    cudaMemcpy(dc1, c1, sizeof(int)*n, cudaMemcpyHostToDevice);
    cudaMemcpy(dc2, c2, sizeof(int)*n, cudaMemcpyHostToDevice);
    cudaMemcpy(dc3, c3, sizeof(int)*n, cudaMemcpyHostToDevice);
    cudaMemcpy(dass, ass, sizeof(int)*n, cudaMemcpyHostToDevice);


    calculate<<<1, n>>>(dc1, dc2, dc3, dass, dout);
    cudaDeviceSynchronize();


    float host[n];
    cudaMemcpy(host, dout, sizeof(float)*n, cudaMemcpyDeviceToHost);


    printf("Internal Marks are:\n");
    for (int i = 0; i < n; i++) {
        printf("%.2f ", host[i]);
    }
    printf("\n");


    cudaFree(dc1);
    cudaFree(dc2);
    cudaFree(dc3);
    cudaFree(dass);
    cudaFree(dout);

    return 0;
}


Overwriting InternalMark.cu


In [None]:
!nvcc -arch=sm_75 InternalMark.cu -o InternalMark
!./InternalMark


Internal Marks are:
19.00 36.20 34.20 35.40 33.40 8.40 46.80 38.00 25.60 36.40 33.00 18.80 45.40 24.20 31.20 14.20 40.20 47.60 30.80 36.40 
