### Global and Const Variable

In [23]:
p = 2
function pow_array(x::Vector{Float64})
    s = 0.0
    for y in x
        s = s + y^p
    end
    return s
end

const q = 2
function pow_array2(x::Vector{Float64})
    s = 0.0
    for y in x
        s = s + y^q
    end
    return s
end

function pow_array3(x::Vector{Float64})
    s = 0.0
    for y in x
        s = s + y^2
    end
    return s
end

pow_array3 (generic function with 1 method)

In [18]:
t = rand(10000)

@benchmark pow_array(t)

BenchmarkTools.Trial: 
  memory estimate:  468.75 KiB
  allocs estimate:  30000
  --------------
  minimum time:     401.932 μs (0.00% GC)
  median time:      414.468 μs (0.00% GC)
  mean time:        463.168 μs (4.21% GC)
  maximum time:     39.515 ms (98.79% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [21]:
@benchmark pow_array2(t)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     12.956 μs (0.00% GC)
  median time:      12.978 μs (0.00% GC)
  mean time:        12.994 μs (0.00% GC)
  maximum time:     23.874 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [25]:
@code_warntype pow_array2(t)

Variables
  #self#[36m::Core.Compiler.Const(pow_array2, false)[39m
  x[36m::Array{Float64,1}[39m
  s[36m::Float64[39m
  @_4[33m[1m::Union{Nothing, Tuple{Float64,Int64}}[22m[39m
  y[36m::Float64[39m

Body[36m::Float64[39m
[90m1 ─[39m       (s = 0.0)
[90m│  [39m %2  = x[36m::Array{Float64,1}[39m
[90m│  [39m       (@_4 = Base.iterate(%2))
[90m│  [39m %4  = (@_4 === nothing)[36m::Bool[39m
[90m│  [39m %5  = Base.not_int(%4)[36m::Bool[39m
[90m└──[39m       goto #4 if not %5
[90m2 ┄[39m %7  = @_4::Tuple{Float64,Int64}[36m::Tuple{Float64,Int64}[39m
[90m│  [39m       (y = Core.getfield(%7, 1))
[90m│  [39m %9  = Core.getfield(%7, 2)[36m::Int64[39m
[90m│  [39m %10 = s[36m::Float64[39m
[90m│  [39m %11 = (y ^ Main.q)[36m::Float64[39m
[90m│  [39m       (s = %10 + %11)
[90m│  [39m       (@_4 = Base.iterate(%2, %9))
[90m│  [39m %14 = (@_4 === nothing)[36m::Bool[39m
[90m│  [39m %15 = Base.not_int(%14)[36m::Bool[39m
[90m└──[39m       goto #

#### With @Fastmath macro

In [39]:
function sum_diff(x)
n = length(x); d = 1/(n-1)
s = zero(eltype(x))
s = s + (x[2] - x[1]) / d
for i = 2:length(x)-1
s = s + (x[i+1] - x[i+1]) / (2*d)
end
s = s + (x[n] - x[n-1])/d
end

sum_diff (generic function with 1 method)

In [40]:
function sum_diff_fast(x)
n=length(x); d = 1/(n-1)
s = zero(eltype(x))
@fastmath s = s + (x[2] - x[1]) / d
@fastmath for i = 2:n-1
s = s + (x[i+1] - x[i+1]) / (2*d)
end
@fastmath s = s + (x[n] - x[n-1])/d
end

sum_diff_fast (generic function with 1 method)

In [41]:
t=rand(2000)
@benchmark sum_diff(t)

BenchmarkTools.Trial: 
  memory estimate:  16 bytes
  allocs estimate:  1
  --------------
  minimum time:     2.601 μs (0.00% GC)
  median time:      2.605 μs (0.00% GC)
  mean time:        2.623 μs (0.00% GC)
  maximum time:     3.680 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     9

In [42]:
@benchmark sum_diff_fast(t)

BenchmarkTools.Trial: 
  memory estimate:  16 bytes
  allocs estimate:  1
  --------------
  minimum time:     683.675 ns (0.00% GC)
  median time:      695.682 ns (0.00% GC)
  mean time:        711.369 ns (0.00% GC)
  maximum time:     1.689 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     151

### Column vs row iteration

In [52]:
function col_iter(x)
    s=zero(eltype(x))
    for i = 1:size(x, 2)
        for j = 1:size(x, 1)
            s = s + x[j, i] ^ 2
            x[j, i] = s
        end
    end
end

function row_iter(x)
    s=zero(eltype(x))
    for i = 1:size(x, 1)
        for j = 1:size(x, 2)
            s = s + x[i, j] ^ 2
            x[i, j] = s
        end
    end
end

row_iter (generic function with 1 method)

In [54]:
a = rand(1000, 1000);

In [55]:
@benchmark col_iter(a)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     1.312 ms (0.00% GC)
  median time:      1.318 ms (0.00% GC)
  mean time:        1.323 ms (0.00% GC)
  maximum time:     1.822 ms (0.00% GC)
  --------------
  samples:          3765
  evals/sample:     1

In [56]:
@benchmark row_iter(a)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     2.347 ms (0.00% GC)
  median time:      2.388 ms (0.00% GC)
  mean time:        2.401 ms (0.00% GC)
  maximum time:     3.602 ms (0.00% GC)
  --------------
  samples:          2077
  evals/sample:     1

#### @bounds checking

In [57]:
function prefix_bounds(a, b)
for i = 2:size(a, 1)
a[i] = b[i-1] + b[i]
end
end
function prefix_inbounds(a, b)
@inbounds for i = 2:size(a, 1)
a[i] = b[i-1] + b[i]
end
end

prefix_inbounds (generic function with 1 method)

In [59]:
a = rand(100)
b = rand(100);

In [61]:
@benchmark prefix_bounds(a,b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     101.335 ns (0.00% GC)
  median time:      101.816 ns (0.00% GC)
  mean time:        107.424 ns (0.00% GC)
  maximum time:     216.734 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     940

In [63]:
@benchmark prefix_inbounds(a,b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     29.146 ns (0.00% GC)
  median time:      29.409 ns (0.00% GC)
  mean time:        29.654 ns (0.00% GC)
  maximum time:     74.519 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     995

### Memory Allocation

In [64]:
function xpow(x)
return [x x^2 x^3 x^4]
end
function xpow_loop(n)
s = 0
for i = 1:n
s = s + xpow(i)[2]
end
return s
end

xpow_loop (generic function with 1 method)

In [65]:
@benchmark xpow_loop(1000000)

BenchmarkTools.Trial: 
  memory estimate:  106.81 MiB
  allocs estimate:  1000000
  --------------
  minimum time:     35.859 ms (10.72% GC)
  median time:      37.446 ms (14.17% GC)
  mean time:        39.024 ms (15.02% GC)
  maximum time:     106.332 ms (56.11% GC)
  --------------
  samples:          129
  evals/sample:     1

In [66]:
function xpow!(result::Array{Int, 1}, x)
@assert length(result) == 4
result[1] = x
result[2] = x^2
result[3] = x^3
result[4] = x^4
end
function xpow_loop_noalloc(n)
r = [0, 0, 0, 0]
s = 0
for i = 1:n
xpow!(r, i)
s = s + r[2]
end
s
end

xpow_loop_noalloc (generic function with 1 method)

In [67]:
@benchmark xpow_loop_noalloc(1000000)

BenchmarkTools.Trial: 
  memory estimate:  112 bytes
  allocs estimate:  1
  --------------
  minimum time:     6.642 ms (0.00% GC)
  median time:      6.729 ms (0.00% GC)
  mean time:        6.788 ms (0.00% GC)
  maximum time:     8.696 ms (0.00% GC)
  --------------
  samples:          737
  evals/sample:     1

### Memory Allocation 2

In [68]:
function sum_vector(x::Array{Float64, 1})
s = 0.0
for i = 1:length(x)
s = s + x[i]
end
return s
end
function sum_cols_matrix(x::Array{Float64, 2})
num_cols = size(x, 2)
s = zeros(num_cols)
for i = 1:num_cols
s[i] = sum_vector(x[:, i])
end
return s
end

sum_cols_matrix (generic function with 1 method)

In [74]:
@benchmark sum_cols_matrix(rand(10000, 10000))

BenchmarkTools.Trial: 
  memory estimate:  1.49 GiB
  allocs estimate:  20004
  --------------
  minimum time:     712.681 ms (21.20% GC)
  median time:      714.630 ms (21.32% GC)
  mean time:        729.088 ms (21.90% GC)
  maximum time:     753.459 ms (22.58% GC)
  --------------
  samples:          7
  evals/sample:     1

In [72]:
function sum_vector(x::AbstractArray)
s = 0.0
for i = 1:length(x)
s = s + x[i]
end
    return s
end
function sum_cols_matrix_views(x::Array{Float64, 2})
num_cols = size(x, 2); num_rows = size(x, 1)
s = zeros(num_cols)
for i = 1:num_cols
s[i] = sum_vector(view(x, 1:num_rows, i))
end
return s
end

sum_cols_matrix_views (generic function with 1 method)

In [75]:
@benchmark sum_cols_matrix_views(rand(10000, 10000))

BenchmarkTools.Trial: 
  memory estimate:  763.02 MiB
  allocs estimate:  4
  --------------
  minimum time:     471.349 ms (1.89% GC)
  median time:      490.381 ms (5.84% GC)
  mean time:        497.052 ms (7.08% GC)
  maximum time:     540.961 ms (14.58% GC)
  --------------
  samples:          11
  evals/sample:     1