In [1]:
using BenchmarkTools

In [2]:
function MaxPool_v1(input, pool_size)
    input_height, input_width, channels = size(input)
    pool_height, pool_width = pool_size
    
    output_height = div(input_height, pool_height)
    output_width = div(input_width, pool_width)
    
    output = zeros(Float32, output_height, output_width, channels)
    
    for c in 1:channels
        for i in 1:output_height
            for j in 1:output_width
                row_start = 1 + (i-1)*pool_height
                row_end = row_start + pool_height-1
                col_start = 1 + (j-1)*pool_width
                col_end = col_start + pool_width-1
            
                pool = input[row_start:row_end, col_start:col_end, c]
                output[i, j, c] = maximum(pool)
            end
        end
    end
    return output
end


MaxPool_v1 (generic function with 1 method)

In [3]:
function MaxPool_v2(input, pool_size)
    input_rows, input_columns, channels = size(input)
    pool_height, pool_width = pool_size
    
    output_rows = div(input_rows, pool_height)
    output_columns = div(input_columns, pool_width)
    
    output = zeros(Float32, output_rows, output_columns, channels)
    
    for c in 1:channels
        for col in 1:output_columns
            for row in 1:output_rows
                row_start = 1 + (row-1)*pool_height
                row_end = row_start + pool_height-1
                col_start = 1 + (col-1)*pool_width
                col_end = col_start + pool_width-1
            
                pool = @view input[row_start:row_end, col_start:col_end, c]
                output[row, col, c] = maximum(pool)
            end
        end
    end
    return output
end

MaxPool_v2 (generic function with 1 method)

In [5]:
input = rand(Float32, 28,28,6);

In [6]:
@benchmark MaxPool_v1(input, (2,2))

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m40.166 μs[22m[39m … [35m 2.750 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 97.10%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m41.667 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m43.912 μs[22m[39m ± [32m45.268 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m3.61% ±  4.16%

  [39m [39m▁[39m▅[39m▂[39m▃[39m█[34m▅[39m[39m▁[39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m█[39m█[39m█[39m█[39m█

In [7]:
@benchmark MaxPool_v2(input, (2,2))

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m13.458 μs[22m[39m … [35m53.958 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m16.417 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m16.053 μs[22m[39m ± [32m 2.030 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m [39m [39m [39m [39m▅[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [34m [39m[39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▆[39m▁[39m▂[39m▃[39m█[3

In [9]:
MaxPool_v1(input, (2,2)) == MaxPool_v2(input, (2,2))

true