In [219]:
import timeit
from numba import cuda
import numpy as np
import pandas as pd

In [220]:
def cpu_points_count(points):
    count_res = 0
    for point in points:
        if point[0] ** 2 + point[1] ** 2 <= 1:
            count_res += 1
    return count_res

@cuda.jit
def gpu_points_count(points, count):
    index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    while index < points.shape[0]:
        if points[index][0] ** 2 + points[index][1] ** 2 <= 1:
            cuda.atomic.add(count, 0, 1)
        index += cuda.blockDim.x * cuda.gridDim.x

def gpu_calc(points, count):
    # Перенос данных в глобальную память GPU
    d_points = cuda.to_device(points)
    d_count = cuda.device_array_like(count)
    threads_per_block = device.WARP_SIZE
    blocks_per_grid = 256
    gpu_points_count[blocks_per_grid, threads_per_block](d_points, d_count)
    return d_count.copy_to_host()

In [228]:
n = 1000000
tests = 10
points = np.random.rand(n, 2)
count = np.zeros(1)
device = cuda.get_current_device()
gpu_res = 4 * gpu_calc(points, count)[0] / n
cpu_res = 4 * cpu_points_count(points) / n
time_gpu = timeit.timeit(lambda: gpu_calc(points, count), number=tests) / tests
time_cpu = timeit.timeit(lambda: cpu_points_count(points), number=tests) / tests
print(f"Время CPU = {time_cpu}")
print(f"Время GPU = {time_gpu}")
print(f"Ускорение = {time_cpu / time_gpu}")
print(f"Результат CPU = {cpu_res}")
print(f"Результат GPU = {gpu_res}")

Время CPU = 0.5301465100143105
Время GPU = 0.01316682999022305
Ускорение = 40.26379245482531
Результат CPU = 3.141184
Результат GPU = 3.141185831793686
