# 卷积

In [4]:
import numpy as np
from scipy import misc
import time

from scipy import signal as sg

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

In [6]:

# DEVICE SETUP
BLOCK_SIZE = 32  # Max 32. 32**2 = 1024, max for GTX1060
    
# Compile kernel
mod = SourceModule(open("../../datas/cuda/05-convolution.cu", "r").read())

# Get functions
conv = mod.get_function("conv")


def convolve(a, b):
    global BLOCK_SIZE
    global conv
    
    a, b = [np.array(i).astype(np.float32) for i in [a, b]]
    
    # Matrix A 
    aw = np.int32(a.shape[1])  # Widthof in matrix
    ah = np.int32(a.shape[0])  # Height of in matrix
    
    # Matrix B (kernel)
    bw = np.int32(b.shape[1])  # Widthof in matrix
    if bw % 2 == 0:
        print("Kernel width is not an odd number! Strange things will happen...")
    bh = np.int32(b.shape[0])  # Height of in matrix
    if bh % 2 == 0:
        print("Kernel height is not an odd number! Strange things will happen...")
    b_sum = np.int32(np.absolute(b).sum())
    
    # Matrix C, subtract 2*padding, *2 because it's taken off all sides
    c = np.empty([ah-(bh-1), aw-(bw-1)])
    c = c.astype(np.float32)
    
    # Allocate memory on device
    a_gpu = cuda.mem_alloc(a.nbytes)
    b_gpu = cuda.mem_alloc(b.nbytes)
    c_gpu = cuda.mem_alloc(c.nbytes)
    
    # Copy matrix to memory
    cuda.memcpy_htod(a_gpu, a)
    cuda.memcpy_htod(b_gpu, b)

    # Set grid size from A matrix
    grid = (int(aw/BLOCK_SIZE+(0 if aw % BLOCK_SIZE is 0 else 1)), 
            int(ah/BLOCK_SIZE+(0 if ah % BLOCK_SIZE is 0 else 1)), 
                          1)
    
    # Call gpu function
    conv(a_gpu, b_gpu, aw, ah, bw, bh, b_sum, c_gpu, block=(BLOCK_SIZE, BLOCK_SIZE, 1), grid=grid)
    
    # Copy back the result
    cuda.memcpy_dtoh(c, c_gpu)
    
    # Free memory. May not be useful? Ask about this.
    a_gpu.free()
    b_gpu.free()
    c_gpu.free()
    
    # Return the result
    return c

kernel.cu

  


In [7]:
# Get numpy array from image
def from_img(fname):
    return np.asarray(misc.imread(fname, flatten=True), dtype=np.float32)


# Write numpy array to image
def to_img(m):
    return np.clip(np.absolute(m), 0, 255)


# Create a box blue kernel of radius r
def k_boxblur(r):
    return np.ones([r+(0 if r % 2 is 1 else 1), r+(0 if r % 2 is 1 else 1)])


# Normalize kernel matrix
def nrm(m):
    m = np.array(m)
    return m/np.sum(np.abs(m))


# CREATE KERNELS
k_sv = [[-1., 0., 1.], [-2., 0., 2.], [-1., 0., 1.]]
k_sh = [[-1., -2., -1.], [0., 0., 0.], [1., 2., 1.]]
k_b5 = k_boxblur(5)

# LOAD IMAGE
a = from_img('../../datas/f2.jpg').astype(np.float32)


# GPU
start = time.time()
c = convolve(a, k_sv)
misc.imsave('../../temp/g_result_sv.png', to_img(c))
c = convolve(a, k_sh)
misc.imsave('../../temp/g_result_sh.png', to_img(c))
c = convolve(a, k_b5)
misc.imsave('../../temp/g_result_b5.png', to_img(c))
end = time.time()
print("GPU time: %.5f s" % (end-start))

# SCIPY
start = time.time()
c = sg.convolve(a, nrm(k_sv))
misc.imsave('../../temp/s_result_sv.png', to_img(c))
c = sg.convolve(a, nrm(k_sh))
misc.imsave('../../temp/s_result_sh.png', to_img(c))
c = sg.convolve(a, nrm(k_b5))
misc.imsave('../../temp/s_result_b5.png', to_img(c))
end = time.time()
print("Scipy time: %.5f s" % (end-start))

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


GPU time: 0.67976 s


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Scipy time: 0.60438 s
