In [47]:
import time
import cv2
import numba
from numba import cuda
import numpy as np

In [48]:
def cpu_salt_and_pepper(s_and_p):
    row, col = s_and_p.shape[0] - 2, s_and_p.shape[1] - 2
    res = np.zeros((row, col))
    for i in range(row):
        for j in range(col):
            window = s_and_p[i:i+3, j:j+3].flatten()
            window.sort()
            res[i, j] = window[4]
    return res

@cuda.jit
def gpu_salt_and_pepper(s_and_p, res):
    index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    row, col = s_and_p.shape[0] - 2, s_and_p.shape[1] - 2
    while index < row * col:
        window = cuda.local.array(shape=9, dtype=numba.float64)
        i, j = index // col, index % col
        for k in range(9):
            window[k] = s_and_p[i + k // 3, j + k % 3]
        for k in range(8):
            for l in range(8-k):
                if window[l] > window[l+1]:
                    window[l], window[l + 1] = window[l + 1], window[l]
        res[i, j] = window[4]
        index += cuda.blockDim.x * cuda.gridDim.x


def gpu_calc(s_and_p, res):
    d_salt_pep = cuda.to_device(s_and_p)
    d_res = cuda.device_array_like(res)
    threads_per_block = device.WARP_SIZE
    blocks_per_grid = 512
    gpu_salt_and_pepper[blocks_per_grid, threads_per_block](d_salt_pep, d_res)
    return d_res.copy_to_host()

In [50]:
prob = 0.3
original = cv2.imread('original.bmp', cv2.IMREAD_GRAYSCALE)
salt_pepper = np.where(np.random.rand(*original.shape) < prob, np.random.randint(0, 255, original.shape), original)
cv2.imwrite('salt_pepper.bmp', salt_pepper)
s_and_p = np.pad(salt_pepper, 1, mode="symmetric")
res= np.zeros(salt_pepper.shape)
device = cuda.get_current_device()

start_time_cpu = time.time()
cpu_res = cpu_salt_and_pepper(s_and_p)
cpu_time = time.time() - start_time_cpu

start_time_gpu = time.time()
gpu_res = gpu_calc(s_and_p, res)
gpu_time = time.time() - start_time_gpu


print(np.allclose(cpu_res, gpu_res))
cv2.imwrite('cpu_res.bmp', cpu_res)
cv2.imwrite('gpu_res.bmp', gpu_res)
print(f"Время CPU = {cpu_time}")
print(f"Время GPU = {gpu_time}")
print(f"Ускорение = {cpu_time / gpu_time}")

True
Время CPU = 4.012222766876221
Время GPU = 0.054596900939941406
Ускорение = 73.48810459571347
