In [1]:
using BenchmarkTools

In [2]:
function ReLu_backward_1d_v1(input::Array{Float64,1}, g)
    height = length(input)
    J = zeros(height)
    for i in 1:height
        if input[i] > 0
            J[i] = 1
        end
    end
    tuple(J.* g)
end

ReLu_backward_1d_v1 (generic function with 1 method)

In [3]:
function ReLu_backward_3d_v1(input::Array{Float64,3}, g)
    height, width, channels = size(input)
    J = zeros(height, width, channels)
    for i in 1:height
        for j in 1:width
            for c in 1:channels
                if input[i,j,c] > 0
                    J[i,j,c] = 1
                end
            end
        end
    end
    tuple(J.* g)
end


ReLu_backward_3d_v1 (generic function with 1 method)

In [4]:
function ReLu_backward_1d_v2(input::Array{Float64,1}, g)
    J = input .> 0
    tuple(J .* g)
end

ReLu_backward_1d_v2 (generic function with 1 method)

In [5]:
function ReLu_backward_3d_v2(input::Array{Float64,3}, g)
    J = input .> 0
    tuple(J .* g)
end

ReLu_backward_3d_v2 (generic function with 1 method)

In [6]:
function ReLu_backward_1d_v3(input::Array{Float64,1}, g)
    tuple(g .* (input .>= 0))
end

ReLu_backward_1d_v3 (generic function with 1 method)

In [7]:
input_1d = randn(4000);
gradient_1d = randn(4000);
input_3d = randn(26, 26, 100);
gradient_3d = randn(26, 26, 100);

In [8]:
@benchmark ReLu_backward_1d_v1(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m13.500 μs[22m[39m … [35m  5.326 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 98.94%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m36.400 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m43.859 μs[22m[39m ± [32m160.104 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m13.47% ±  3.68%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▅[39m▃[39m▇[34m█[39m[39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m▂[39m▂[39m▂

In [9]:
@benchmark ReLu_backward_1d_v2(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 6 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 5.283 μs[22m[39m … [35m 1.128 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 96.88%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m15.183 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m15.141 μs[22m[39m ± [32m35.160 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m13.97% ±  6.58%

  [39m█[39m▆[39m▅[39m▃[39m▃[39m▂[39m▂[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [32m▄[39m[34m▅[39m[39m▆[39m▆[39m▅[39m▄[39m▃[39m▃[39m▃[39m▂[39m▂[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█[39m█[39m█[3

In [10]:
@benchmark ReLu_backward_1d_v3(input_1d, gradient_1d)

BenchmarkTools.Trial: 10000 samples with 9 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 2.267 μs[22m[39m … [35m501.956 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 96.79%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m 6.144 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m10.081 μs[22m[39m ± [32m 22.877 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.19% ±  7.41%

  [39m [39m█[39m [39m [39m [39m [39m [39m [39m [34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m█[39m▆[39m▃

In [15]:
ReLu_backward_1d_v2(input_1d, gradient_1d) == ReLu_backward_1d_v1(input_1d, gradient_1d) == ReLu_backward_1d_v3(input_1d, gradient_1d)

true

In [12]:
@benchmark ReLu_backward_3d_v1(input_3d, gradient_3d)

BenchmarkTools.Trial: 7693 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m428.800 μs[22m[39m … [35m  4.849 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 72.75%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m509.300 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m644.181 μs[22m[39m ± [32m315.958 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m4.98% ±  9.51%

  [39m█[39m▅[39m▃[34m▃[39m[39m▃[39m▃[39m▂[39m▂[32m [39m[39m [39m [39m [39m▆[39m▅[39m▄[39m▃[39m▃[39m▃[39m▂[39m▂[39m▁[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█[

In [13]:
@benchmark ReLu_backward_3d_v2(input_3d, gradient_3d)

BenchmarkTools.Trial: 8697 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m480.000 μs[22m[39m … [35m  4.606 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 85.02%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m499.400 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m570.537 μs[22m[39m ± [32m163.063 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m2.14% ±  6.40%

  [39m█[34m▅[39m[39m▄[39m▄[39m▃[32m▁[39m[39m▁[39m▁[39m▄[39m▅[39m▄[39m▃[39m▃[39m▂[39m▁[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [39m█[34m█[39m[3

In [14]:
ReLu_backward_3d_v2(input_3d, gradient_3d) == ReLu_backward_3d_v1(input_3d, gradient_3d)

true