In [1]:
using BenchmarkTools

In [2]:
function Convolution_2d_v1(input, kernel, bias; padding=false)
    input_height, input_width = size(input)
    kernel_height, kernel_width = size(kernel)

    if padding
        tmp = zeros(input_height+2*kernel_height-2, input_width+2*kernel_width-2)
        for i in 1:input_height
            for j in 1:input_width
                tmp[i+kernel_height-1, j+kernel_width-1] = input[i,j]
            end
        end
        input = tmp
        input_height, input_width = size(input)
    end


    output_height = input_height - kernel_height + 1
    output_width = input_width - kernel_width + 1    
    output = zeros(output_height, output_width)

    for i in 1:output_height
        for j in 1:output_width
            patch = input[i:i+kernel_height-1, j:j+kernel_width-1]
            output[i, j] = sum(patch .* kernel) .+ bias
        end
    end
    return output
end

Convolution_2d_v1 (generic function with 1 method)

In [3]:
function Convolution_2d_v2(input, kernel; bias=0., padding=false)
    input_rows, input_columns = size(input)
    kernel_height, kernel_width = size(kernel)

    if padding
        padded_input = zeros(Float32, input_rows + 2*kernel_height - 2, input_columns + 2*kernel_width - 2)
        padded_input[kernel_height:end-kernel_height+1, kernel_width:end-kernel_width+1] .= input
        input_rows, input_columns = size(padded_input)
        input = padded_input
    end

    output_rows = input_rows - kernel_height + 1
    output_columns = input_columns - kernel_width + 1
    output = zeros(Float32, output_rows, output_columns)


    @fastmath for c in 1:output_columns
        @fastmath for r in 1:output_rows
            patch = @view input[r:r+kernel_height-1, c:c+kernel_width-1]
            output[r, c] = sum(patch .* kernel) + bias
        end
    end
    return output
end

Convolution_2d_v2 (generic function with 1 method)

In [4]:
test_input = rand(Float32, 28,28);
test_kernel = rand(Float32, 3, 3);
test_bias = Float32(1);

In [5]:
@benchmark Convolution_2d_v1(test_input, test_kernel, 0; padding=true)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m68.584 μs[22m[39m … [35m 1.871 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 94.97%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m69.833 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m72.009 μs[22m[39m ± [32m26.149 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.37% ±  4.47%

  [39m [39m█[34m▇[39m[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▃[39m█[34m█[39m[39m▆[39m▃[

In [6]:
@benchmark Convolution_2d_v2(test_input, test_kernel; bias=test_bias, padding=true)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m39.333 μs[22m[39m … [35m836.750 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 90.25%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m40.250 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m42.043 μs[22m[39m ± [32m 19.098 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.77% ±  3.81%

  [39m [39m▄[39m█[39m█[34m▆[39m[39m▄[39m▃[39m▂[39m▃[39m▃[39m▃[32m▃[39m[39m▂[39m▂[39m [39m▁[39m▁[39m [39m [39m [39m [39m [39m▁[39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█[39m█[34

In [7]:
Convolution_2d_v1(test_input, test_kernel, 0) == Convolution_2d_v2(test_input, test_kernel)

true