In [1]:
%%writefile cuda_montecarlo.cu

#include <stdio.h>
#include <curand_kernel.h>

__global__ void monteCarloPi(int *count, int N) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    curandState state;
    curand_init(1234, idx, 0, &state);

    int local = 0;
    for (int i = 0; i < N; i++) {
        float x = curand_uniform(&state);
        float y = curand_uniform(&state);
        if (x * x + y * y <= 1.0f)
            local++;
    }

    atomicAdd(count, local);
}

int main() {
    int N = 1000;
    int h_count = 0;
    int *d_count;

    cudaMalloc(&d_count, sizeof(int));
    cudaMemcpy(d_count, &h_count, sizeof(int), cudaMemcpyHostToDevice);

    monteCarloPi<<<128, 128>>>(d_count, N);

    cudaMemcpy(&h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost);

    double pi = 4.0 * h_count / (128 * 128 * N);
    printf("Estimated Pi = %f\n", pi);

    cudaFree(d_count);
    return 0;
}


Writing cuda_montecarlo.cu


In [2]:
!nvcc cuda_montecarlo.cu -o cuda_montecarlo -arch=sm_70

In [3]:
!./cuda_montecarlo

Estimated Pi = 3.140885
