In [1]:
using BenchmarkTools

In [2]:
function ReLu_backward_1d_v1(input::Array{Float64,1}, g)
    height = length(input)
    J = zeros(height)
    for i in 1:height
        if input[i] > 0
            J[i] = 1
        end
    end
    tuple(J.* g)
end

ReLu_backward_1d_v1 (generic function with 1 method)

In [3]:
function ReLu_backward_3d_v1(input::Array{Float64,3}, g)
    height, width, channels = size(input)
    J = zeros(height, width, channels)
    for i in 1:height
        for j in 1:width
            for c in 1:channels
                if input[i,j,c] > 0
                    J[i,j,c] = 1
                end
            end
        end
    end
    tuple(J.* g)
end


ReLu_backward_3d_v1 (generic function with 1 method)

In [4]:
function ReLu_backward_1d_v2(input::Array{Float64,1}, g)
    J = input .> 0
    tuple(J .* g)
end

ReLu_backward_1d_v2 (generic function with 1 method)

In [5]:
function ReLu_backward_3d_v2(input::Array{Float64,3}, g)
    J = input .> 0
    tuple(J .* g)
end

ReLu_backward_3d_v2 (generic function with 1 method)

In [6]:
function ReLu_backward_3d_v3(input::Array{Float64,3}, g)
    tuple(g .* (input .>= 0))
end

ReLu_backward_3d_v3 (generic function with 1 method)

In [7]:
function ReLu_backward_1d_v3(input::Array{Float64,1}, g)
    tuple(g .* (input .>= 0))
end

ReLu_backward_1d_v3 (generic function with 1 method)

In [8]:
input_1d = randn(4000);
gradient_1d = randn(4000);
input_3d = randn(26, 26, 100);
gradient_3d = randn(26, 26, 100);

In [9]:
@benchmark ReLu_backward_1d_v1(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 5 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 4.883 μs[22m[39m … [35m779.292 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 98.10%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m10.700 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m12.956 μs[22m[39m ± [32m 24.177 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.02% ±  7.92%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▆[39m▇[39m█[34m▇[39m[39m▄[39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▃[39m▃[39m▂[39m▂

In [10]:
@benchmark ReLu_backward_1d_v2(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 7 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m4.179 μs[22m[39m … [35m433.220 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 97.58%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m7.030 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m8.419 μs[22m[39m ± [32m 19.548 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m17.19% ±  7.23%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▄[39m▅[39m▆[39m▇[34m█[39m[39m▆[39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▇[39m▇[39m▃[39m▂[39m▁[3

In [11]:
@benchmark ReLu_backward_1d_v3(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 10 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.133 μs[22m[39m … [35m194.113 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 96.64%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m3.825 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.564 μs[22m[39m ± [32m  8.767 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.67% ±  7.92%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃[39m▇[39m█[39m▆[39m▆[34m▄[39m[39m▂[39m▂[39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m▅[39m▅[39m▂[39m▂[

In [12]:
ReLu_backward_1d_v2(input_1d, gradient_1d) == ReLu_backward_1d_v1(input_1d, gradient_1d) == ReLu_backward_1d_v3(input_1d, gradient_1d)

true

In [13]:
@benchmark ReLu_backward_3d_v1(input_3d, gradient_3d)

BenchmarkTools.Trial: 9319 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m409.459 μs[22m[39m … [35m  3.571 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 81.08%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m504.916 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m535.109 μs[22m[39m ± [32m198.945 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m5.43% ± 10.58%

  [39m [39m [39m [39m▇[34m█[39m[39m▂[32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m▃[39m▃[

In [14]:
@benchmark ReLu_backward_3d_v2(input_3d, gradient_3d)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m110.916 μs[22m[39m … [35m  3.849 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 94.48%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m146.208 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m159.010 μs[22m[39m ± [32m114.862 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m7.21% ±  9.21%

  [39m▄[39m▁[39m█[34m▇[39m[32m▅[39m[39m▃[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█

In [15]:
@benchmark ReLu_backward_3d_v3(input_3d, gradient_3d)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m26.583 μs[22m[39m … [35m 2.114 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 94.69%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m77.541 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m80.661 μs[22m[39m ± [32m67.928 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m8.28% ±  9.25%

  [39m▅[39m [39m [39m▂[39m [39m█[34m▅[39m[39m▄[39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [39m█[39m▆[39m▁[39m█[39m█[39m█[34m█

In [17]:
ReLu_backward_3d_v2(input_3d, gradient_3d) == ReLu_backward_3d_v1(input_3d, gradient_3d) == ReLu_backward_3d_v3(input_3d, gradient_3d)

true