In [1]:
using BenchmarkTools

In [2]:
function MaxPool_v1(input, pool_size, gradient)
    input_height, input_width, channels = size(input)
    pool_height, pool_width = pool_size
    gradient_height, gradient_width = size(gradient)
    
    input_height % pool_height != 0 ? input_height_new = pool_height*size(gradient)[1] : input_height_new = input_height
    input_width % pool_width != 0 ? input_width_new = pool_width*size(gradient)[2] : input_width_new = input_width
    
    J = zeros(Float32, input_height, input_width, channels)
        
    for c in 1:channels
        for i in 1:pool_width:input_width_new
            for j in 1:pool_height:input_height_new
                end_i = min(i + pool_width - 1, input_width)
                end_j = min(j + pool_height - 1, input_height)
            
                max_value, max_idx = findmax(input[i:end_i, j:end_j,c])
                    
                J[i + max_idx[1] - 1, j + max_idx[2] - 1,c] = 1*gradient[div(i-1,pool_width) + 1, div(j-1, pool_height) + 1, c]
            end
        end
    end

    return tuple(J)
end


MaxPool_v1 (generic function with 1 method)

In [3]:
function MaxPool_v2(input, pool_size, gradient)
    input_height, input_width, channels = size(input)
    pool_height, pool_width = pool_size
    gradient_height, gradient_width = size(gradient)
    
    input_height % pool_height != 0 ? input_height_new = pool_height*size(gradient)[1] : input_height_new = input_height
    input_width % pool_width != 0 ? input_width_new = pool_width*size(gradient)[2] : input_width_new = input_width
    
    J = zeros(Float32, input_height, input_width, channels)
        
    for c in 1:channels
        for j in 1:pool_height:input_height_new
            for i in 1:pool_width:input_width_new
                end_i = min(i + pool_width - 1, input_width)
                end_j = min(j + pool_height - 1, input_height)
            
                max_value, max_idx = findmax(@views input[i:end_i, j:end_j,c])
                    
                J[i + max_idx[1] - 1, j + max_idx[2] - 1,c] = 1*gradient[div(i-1,pool_width) + 1, div(j-1, pool_height) + 1, c]
            end
        end
    end

    return tuple(J)
end

MaxPool_v2 (generic function with 1 method)

In [28]:
function MaxPool_v3(input, pool_size, gradient)
    input_height, input_width, channels = size(input)
    pool_height, pool_width = pool_size
    gradient_height, gradient_width = size(gradient)

    J = zeros(Float32, input_height, input_width, channels)
    
    input_height % pool_height != 0 ? input_height_new = pool_height*gradient_height : input_height_new = input_height
    input_width % pool_width != 0 ? input_width_new = pool_width*gradient_width : input_width_new = input_width
    
    
        
    for c in 1:channels
        for j in 1:pool_height:input_height_new
            for i in 1:pool_width:input_width_new
                end_i = min(i + pool_width - 1, input_width)
                end_j = min(j + pool_height - 1, input_height)
            
                @views max_value, max_idx = findmax(input[i:end_i, j:end_j,c])
                    
                J[i + max_idx[1] - 1, j + max_idx[2] - 1,c] = 1*gradient[div(i-1,pool_width) + 1, div(j-1, pool_height) + 1, c]
            end
        end
    end

    return tuple(J)
end

MaxPool_v3 (generic function with 1 method)

In [30]:
input = rand(Float32, 11, 11, 16);
gradient= rand(Float32, 5, 5, 16);

In [31]:
@benchmark MaxPool_v1(input, (2,2), gradient)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m18.041 μs[22m[39m … [35m 2.523 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 98.20%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m19.667 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m21.711 μs[22m[39m ± [32m55.216 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m7.05% ±  2.77%

  [39m [39m▂[39m▅[39m▆[39m█[39m█[39m█[34m█[39m[39m▇[39m▇[39m▆[39m▅[39m▅[39m▄[39m▃[39m▂[32m▁[39m[39m▁[39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃
  [39m█[39m█[39m█[39m█[39m█[39m█

In [32]:
@benchmark MaxPool_v2(input, (2,2), gradient)

BenchmarkTools.Trial: 10000 samples with 4 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m7.489 μs[22m[39m … [35m960.844 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 98.48%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m8.396 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m8.878 μs[22m[39m ± [32m 16.893 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m4.88% ±  2.59%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▄[39m▃[39m▇[39m▆[39m█[39m▅[34m▇[39m[39m▄[39m▇[39m▂[39m▃[39m▁[39m▁[39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m█[39m▅[39m▅[39m▄[39m▄

In [33]:
@benchmark MaxPool_v3(input, (2,2), gradient)

BenchmarkTools.Trial: 10000 samples with 4 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m7.417 μs[22m[39m … [35m958.719 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 98.39%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m8.427 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m8.878 μs[22m[39m ± [32m 16.266 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m4.65% ±  2.59%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂[39m▂[39m▃[39m▅[39m▇[34m█[39m[39m▇[39m▆[39m▅[39m▃[39m▁[39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m▆[39m▆[39m▅[39m▃[39m▃

In [7]:
MaxPool_v1(input, (2,2), gradient) == MaxPool_v2(input, (2,2), gradient)

true