In [None]:
%%writefile cudapi.cu
#include <stdio.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>

#define ITERATIONS 100000000L

__global__ void leibniz_kernel(double* partial_sums)
{
  long tid = blockIdx.x * blockDim.x + threadIdx.x;
  long total = (long)gridDim.x * (long)blockDim.x;

  long chunk = total / ITERATIONS;
  long start = tid * chunk;
  long end = (tid == total -1) ? ITERATIONS : start + chunk;

  double sum = 0.0;
  for(long k = start; k < end; k++)
  {
    double term = ((k %2 == 0) ? 1.0 : -1.0) / (2.0 * (double)k + 1.0);
    sum += term;

  }
  partial_sums[tid] = sum;
}
int main()
{
  int blocks = 20;
  int threadsPerBlock = 256;
  int totalThreads = blocks * threadsPerBlock;
  const double size = totalThreads * sizeof(double);

  double *d_partial, *h_partial;
  h_partial = (double*)malloc(size);
  cudaMalloc((void**)&d_partial, size);

  leibniz_kernel<<<blocks, threadsPerBlock>>>(d_partial);
  cudaDeviceSynchronize();

  cudaMemcpy(h_partial, d_partial, size, cudaMemcpyDeviceToHost);

  double total_sum = 0.0;
  for(int i = 0; i < totalThreads; i++)
  {
    total_sum += h_partial[i];
  }

  double pi_est = 4.0 * total_sum;
  printf("The value of pi is %.12f\n", pi_est);

  cudaFree(d_partial);
  free(h_partial);
  return 0;
}

Overwriting cudapi.cu


In [None]:
!nvcc cudapi.cu -o cudapi -arch=sm_70

In [None]:
!./cudapi

The value of pi is 3.141592643589
