<a href="https://colab.research.google.com/github/tonystz/cuda/blob/main/cudaTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pycuda # install cuda

## CPU vs GPU monte_carlo_pi

In [93]:
%%writefile monte_carlo_pi.py
import time
import numpy as np

total = 10000000
#total = 100000
data=np.random.rand(total,2).astype(np.float64)

t=time.time()
def cal_pi_cpu():
  hits = 0
  for i in data:
      x,y = i
      if (x ** 2 + y ** 2) <= 1 :
          hits+=1
  return hits
hits=cal_pi_cpu()
print(f'CPU time:{time.time()-t} hits={hits} total={total} pi={hits * 4.0 / total}')

Overwriting monte_carlo_pi.py


In [None]:
!python -m profile -s cumtime  monte_carlo_pi.py 

In [98]:
%%writefile pi.py
import time
import numpy as np
#GPU
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda import gpuarray
from pycuda.compiler import SourceModule


total = 10000000
#total = 100000
data=np.random.rand(total,2).astype(np.float64)

t=time.time()
def cal_pi_cpu():
  hits = 0
  for i in data:
      x,y = i
      if (x ** 2 + y ** 2) <= 1 :
          hits+=1
  return hits
hits=cal_pi_cpu()
print(f'CPU time:{time.time()-t} hits={hits} total={total} pi={hits * 4.0 / total}')


t=time.time()
mod = SourceModule("""
    #include <stdio.h>

    __global__ void cal_pi_gpu(double *in, unsigned long long *out)
    { 
      unsigned long long idx = threadIdx.x + blockIdx.x * blockDim.x;
      const int colSize=2;
      double x = in[idx*colSize];
      double y = in[idx*colSize+1];
      if ((x*x + y*y) <= 1.0)
        out[idx]=1;
    }
    """)

cal_pi_gpu = mod.get_function("cal_pi_gpu")
data_gpu = gpuarray.to_gpu(data)
out_gpu = gpuarray.zeros((total,1),dtype=np.uint64)
cal_pi_gpu(data_gpu,out_gpu,block=(1000,1,1),grid=(int(total/1000),1,1))
hits=sum(out_gpu.get())[0]
print(f'GPU time:{time.time()-t} hits={hits} total={total} pi={hits * 4.0 / total}')

Overwriting pi.py


In [99]:
!python pi.py

CPU time:18.82800579071045 hits=7853437 total=10000000 pi=3.1413748
GPU time:7.032425403594971 hits=7853437 total=10000000 pi=3.1413748
