In [1]:
using BenchmarkTools

In [2]:
function Convolution_2d_v1(input, kernel, bias; padding=false)
    input_height, input_width = size(input)
    kernel_height, kernel_width = size(kernel)

    if padding
        tmp = zeros(input_height+2*kernel_height-2, input_width+2*kernel_width-2)
        for i in 1:input_height
            for j in 1:input_width
                tmp[i+kernel_height-1, j+kernel_width-1] = input[i,j]
            end
        end
        input = tmp
        input_height, input_width = size(input)
    end


    output_height = input_height - kernel_height + 1
    output_width = input_width - kernel_width + 1    
    output = zeros(output_height, output_width)

    for i in 1:output_height
        for j in 1:output_width
            patch = input[i:i+kernel_height-1, j:j+kernel_width-1]
            output[i, j] = sum(patch .* kernel) .+ bias
        end
    end
    return output
end

Convolution_2d_v1 (generic function with 1 method)

In [3]:
function Convolution_2d_v2(input, kernel; bias=0., padding=false)
    input_rows, input_columns = size(input)
    kernel_height, kernel_width = size(kernel)

    if padding
        padded_input = zeros(Float32, input_rows + 2*kernel_height - 2, input_columns + 2*kernel_width - 2)
        padded_input[kernel_height:end-kernel_height+1, kernel_width:end-kernel_width+1] .= input
        input_rows, input_columns = size(padded_input)
        input = padded_input
    end

    output_rows = input_rows - kernel_height + 1
    output_columns = input_columns - kernel_width + 1
    output = zeros(Float32, output_rows, output_columns)

    for c in 1:output_columns
        for r in 1:output_rows
            patch = @view input[r:r+kernel_height-1, c:c+kernel_width-1]
            output[r, c] = sum(patch .* kernel) + bias
        end
    end
    return output
end

Convolution_2d_v2 (generic function with 1 method)

In [13]:
function Convolution_2d_v3(input, kernel; bias=0., padding=false)
    input_rows, input_columns = size(input)
    kernel_height, kernel_width = size(kernel)

    if padding
        padded_input = zeros(Float32, input_rows + 2*kernel_height - 2, input_columns + 2*kernel_width - 2)
        padded_input[kernel_height:end-kernel_height+1, kernel_width:end-kernel_width+1] .= input
        input_rows, input_columns = size(padded_input)
        input = padded_input
    end

    output_rows = input_rows - kernel_height + 1
    output_columns = input_columns - kernel_width + 1
    output = zeros(Float32, output_rows, output_columns)
    sumret = zeros(size(kernel))
    for c in 1:output_columns
        for r in 1:output_rows
            patch = @view input[r:r+kernel_height-1, c:c+kernel_width-1]
            sumret .= patch .* kernel
            output[r, c] = sum(sumret) + bias
            sumret .= 0.0
        end
    end
    return output
end

Convolution_2d_v3 (generic function with 1 method)

In [5]:
test_input = rand(Float32, 28,28);
test_kernel = rand(Float32, 3, 3);
test_bias = Float32(1);

In [6]:
@benchmark Convolution_2d_v1(test_input, test_kernel, 0; padding=true)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m81.700 μs[22m[39m … [35m 1.456 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 92.96%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m84.800 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m88.345 μs[22m[39m ± [32m29.586 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.90% ±  5.27%

  [39m▃[39m▇[39m█[34m▆[39m[39m▆[39m▆[39m▅[32m▄[39m[39m▄[39m▄[39m▂[39m▂[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█[34m█[39m[39m█[

In [7]:
@benchmark Convolution_2d_v2(test_input, test_kernel; bias=test_bias, padding=true)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m56.400 μs[22m[39m … [35m 1.011 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 88.41%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m57.800 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m60.823 μs[22m[39m ± [32m27.628 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.78% ±  3.84%

  [39m▄[39m█[34m▇[39m[39m▅[39m▄[39m▆[39m▅[32m▃[39m[39m▃[39m▂[39m▁[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[34m█[39m[39m█[39m█[

In [14]:
@benchmark Convolution_2d_v3(test_input, test_kernel; bias=test_bias, padding=true)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m38.900 μs[22m[39m … [35m 5.699 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 98.90%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m39.200 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m43.296 μs[22m[39m ± [32m58.881 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.30% ±  0.99%

  [34m█[39m[39m▆[39m▄[32m▃[39m[39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [34m█[39m[39m█[39m█[32m█[39m[3

In [9]:
Convolution_2d_v2(test_input, test_kernel) - Convolution_2d_v3(test_input, test_kernel)

26×26 Matrix{Float32}:
 -1.19209f-7   0.0          1.19209f-7  …   0.0          0.0
  0.0          0.0          0.0            -2.38419f-7   0.0
  0.0         -2.38419f-7   0.0            -2.38419f-7   2.38419f-7
  0.0         -1.19209f-7   0.0            -1.19209f-7   2.38419f-7
 -1.19209f-7   0.0          1.19209f-7      0.0         -1.19209f-7
 -1.19209f-7   0.0         -1.19209f-7  …   0.0          1.19209f-7
  0.0          1.19209f-7   1.19209f-7      1.19209f-7   0.0
  1.19209f-7   0.0          1.19209f-7      0.0         -1.19209f-7
  0.0          1.19209f-7   0.0            -1.19209f-7   0.0
  2.38419f-7  -2.38419f-7   0.0             0.0          0.0
  2.38419f-7   0.0         -2.38419f-7  …   0.0          0.0
  0.0         -2.38419f-7  -1.19209f-7      0.0         -1.19209f-7
  1.19209f-7   0.0          1.19209f-7      0.0          0.0
  0.0          0.0          1.19209f-7      1.19209f-7   2.38419f-7
  1.19209f-7   0.0          0.0             0.0          0.0
  0.0        

In [10]:
Convolution_2d_v3(test_input, test_kernel)

26×26 Matrix{Float32}:
 1.76645  1.36211  1.55191  1.29746  …  1.17375  1.28229   2.23633  1.8189
 2.09727  1.77395  1.5053   1.61822     1.9628   1.82236   2.23152  1.84141
 2.11709  1.85688  1.44995  2.01385     2.27879  2.36763   2.16847  2.30949
 1.569    1.71549  1.60224  1.57454     1.87915  2.23717   1.81629  2.01961
 1.91606  1.64284  1.65504  1.98727     1.67616  2.09825   2.41312  1.80872
 1.72417  1.49152  1.97741  1.79861  …  2.28245  2.14855   2.26908  1.58633
 1.48458  1.45509  1.3628   1.89359     1.63495  2.26014   1.80828  1.54945
 1.28559  1.19869  1.25129  1.59704     1.31621  1.93928   1.78909  1.31748
 1.53371  1.59954  1.64732  1.68164     1.17879  2.12346   1.70956  1.48585
 2.23411  2.09072  2.20866  2.15413     1.93206  2.16502   2.31437  1.40659
 2.49597  2.14292  2.3174   1.61315  …  2.14232  2.11454   1.7653   1.70648
 2.10362  1.91548  1.81574  1.66439     2.29814  1.89878   1.89519  1.63371
 1.6774   2.09418  1.99284  1.60511     1.73573  1.79949   1.78212