In [2]:
# установка pycuda
! pip install pycuda



In [31]:
# импорт библиотек, необходимых для работы

import numpy as np
np.random.seed(0) 

import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import time
import cv2
import pycuda.driver as drv

# т.к. колаб не отображет cv2.imshow, то использую эту функцию для проверки
from google.colab.patches import cv2_imshow

time: 2.61 ms (started: 2020-12-22 21:04:17 +00:00)


In [32]:
start = drv.Event()
end = drv.Event()

time: 1.11 ms (started: 2020-12-22 21:04:27 +00:00)


In [33]:
# CPU функция
def bilateral_filtering_cpu(im, sigma_r, sigma_d):
    result = np.zeros((im.shape[0], im.shape[1]))
    
    for i in range(1, im.shape[0] - 1):
        for j in range(1, im.shape[1] - 1):
            c, s = 0, 0
            for k in range(i-1, i+1):
                for l in range(j-1, j+1):
                    g = np.exp(-((k - i) ** 2 + (l - j) ** 2) / sigma_d ** 2)
                    r = np.exp(-(im[k, l] - im[i, j]) ** 2 / sigma_r ** 2)
                    c += g*r 
                    s +=   g*r*im[k, l]
            result[i, j] = s / c
    return result


# GPU функция
mod = SourceModule("""
texture<unsigned int, 2, cudaReadModeElementType> tex;

__global__ void bilateral_filtering_gpu(unsigned int* result, const int M, const int N, const float sigma_d, const float sigma_r)
{   
    const int i = threadIdx.x + blockDim.x * blockIdx.x;
    const int j = threadIdx.y + blockDim.y * blockIdx.y;
    if ((i < M) && (j < N)) {
            float s = 0;
            float c = 0;
            for (int l = i - 1; l <= i + 1; l++){
                for (int k = j - 1; k <= j + 1; k++){
                    float img1 = tex2D(tex, k, l) / 255;
                    float img2 = tex2D(tex, i, j) / 255;
                    float g = exp(-(pow(k - i, 2) + pow(l - j, 2)) / pow(sigma_d, 2));
                    float r = exp(-pow((img1 - img2) * 255, 2) / pow(sigma_r, 2));
                    c += g * r;
                    s += g * r * tex2D(tex, k, l);
                }
            }
            result[i * N + j] = s / c;
        }
}
""")

time: 11.6 ms (started: 2020-12-22 21:04:29 +00:00)


In [73]:
# ввод данных

def read_data():
  image_url = '/content/input.jpg'
  print('Sigma_r = ')
  sigma_r = input()
  print('Sigma_d = ')
  sigma_d = input()
  return image_url, float(sigma_r), float(sigma_d)

image_url, sigma_r, sigma_d  = read_data()
image = cv2.imread(image_url, cv2.IMREAD_GRAYSCALE)

Sigma_r = 
10
Sigma_d = 
0.1
time: 5.84 s (started: 2020-12-22 21:13:47 +00:00)


In [None]:
!pip install ipython-autotime

In [74]:
%load_ext autotime
start_cpu = time.time()

start.record() 
start.synchronize()

result = bilateral_filtering_cpu(image, sigma_r, sigma_d)
cpu_time = time.time() - start_cpu

end.record()
end.synchronize()
secs = start.time_till(end)/1000
print(secs)

cv2.imwrite('res_cpu.bmp', result)

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime


  # This is added back by InteractiveShellApp.init_path()


23.66973046875


True

time: 23.7 s (started: 2020-12-22 21:13:56 +00:00)


In [80]:
N, M = image.shape[0], image.shape[1]

block_size = (1, 1, 1)
grid_size = (int(np.ceil(N/block_size[0])),int(np.ceil(M/block_size[1])))

time: 1.51 ms (started: 2020-12-22 21:14:45 +00:00)


In [81]:
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray

result = np.zeros((N, M), dtype = np.uint32)

bilateral_filtering_gpu = mod.get_function("bilateral_filtering_gpu")

time: 2.26 ms (started: 2020-12-22 21:14:47 +00:00)


In [82]:
%load_ext autotime
start_gpu = time.process_time()
start.record()
start.synchronize()

tex = mod.get_texref("tex")
tex.set_filter_mode(drv.filter_mode.LINEAR)
tex.set_address_mode(0, drv.address_mode.MIRROR)
tex.set_address_mode(1, drv.address_mode.MIRROR)
drv.matrix_to_texref(image.astype(np.uint32), tex, order="C")

bilateral_filtering_gpu(drv.Out(result), np.int32(N), np.int32(M), np.float32(sigma_r), np.float32(sigma_d), block=block_size, grid=grid_size, texrefs=[tex])
drv.Context.synchronize()

end.record()
end.synchronize()
secs = start.time_till(end)/1000
print(secs)
time_gpu = time.process_time() - start_gpu

cv2.imwrite('res_gpu.bmp', result.astype(np.uint8))

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
0.04233305740356445


True

time: 53 ms (started: 2020-12-22 21:14:49 +00:00)


In [83]:
print('Время CPU: ', cpu_time)
print('Время GPU: ',time_gpu)
print('CPU/GPU:', cpu_time/time_gpu)

Время CPU:  23.66930627822876
Время GPU:  0.04314951799999278
CPU/GPU: 548.5416147228509
time: 1.34 ms (started: 2020-12-22 21:14:51 +00:00)
