# A few more notes on performance 

Here are a few more things to keep in mind: 

## Avoid globals.

In [1]:
using BenchmarkTools

In [2]:
a = rand(10^7)
function mysum1()
    s = zero(eltype(a))
    for x in a 
        s += x
    end
    s
end
function mysum2(a::Vector{T}) where T
    s = zero(eltype(a))
    for x in a 
        s += x
    end
    s
end

mysum2 (generic function with 1 method)

In [3]:
@benchmark mysum1()

BenchmarkTools.Trial: 
  memory estimate:  762.93 MiB
  allocs estimate:  39999490
  --------------
  minimum time:     969.874 ms (4.76% GC)
  median time:      989.879 ms (4.67% GC)
  mean time:        986.692 ms (4.72% GC)
  maximum time:     1.005 s (4.81% GC)
  --------------
  samples:          6
  evals/sample:     1

In [4]:
@benchmark mysum2($a)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     9.740 ms (0.00% GC)
  median time:      10.162 ms (0.00% GC)
  mean time:        10.223 ms (0.00% GC)
  maximum time:     12.127 ms (0.00% GC)
  --------------
  samples:          489
  evals/sample:     1

## Avoid fields with abstract types 

In [13]:
struct Typ1
    a::Real
end

In [15]:
Base.:+(a::Typ1, b::Typ1) = Typ1(a.a + b.a)

In [19]:
@time x = [Typ1(1.) for i = 1:10^6];

  0.065380 seconds (1.06 M allocations: 25.622 MiB)


In [21]:
@benchmark sum($x)

BenchmarkTools.Trial: 
  memory estimate:  45.78 MiB
  allocs estimate:  2999997
  --------------
  minimum time:     188.380 ms (0.74% GC)
  median time:      189.472 ms (0.76% GC)
  mean time:        191.168 ms (0.78% GC)
  maximum time:     204.946 ms (0.74% GC)
  --------------
  samples:          27
  evals/sample:     1

In [23]:
struct Typ2{T <: Real}
    a::T
end

In [24]:
Base.:+(a::Typ2, b::Typ2) = Typ2(a.a + b.a)

In [27]:
@time y = [Typ2(1.) for i = 1:10^6];

  0.032684 seconds (56.27 k allocations: 10.418 MiB)


In [28]:
@benchmark sum($y)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     439.833 μs (0.00% GC)
  median time:      535.813 μs (0.00% GC)
  mean time:        629.596 μs (0.00% GC)
  maximum time:     28.757 ms (0.00% GC)
  --------------
  samples:          7758
  evals/sample:     1

## Pre-allocate instead of appending

In [33]:
a = rand(10^7)
function myinc1(a::Vector{T}) where T 
    b = Vector{T}()
    for i = 1:size(a,1)
        push!(b, a[i]+1)
    end
    b
end
function myinc2(a::Vector{T}) where T 
    l = length(a)
    b = copy(a)
    for i = 1:size(a,1)
        b[i] = a[i]+1
    end
    b
end

myinc2 (generic function with 1 method)

In [34]:
@benchmark myinc1($a)

BenchmarkTools.Trial: 
  memory estimate:  129.00 MiB
  allocs estimate:  24
  --------------
  minimum time:     147.597 ms (0.84% GC)
  median time:      221.470 ms (32.23% GC)
  mean time:        221.158 ms (32.60% GC)
  maximum time:     297.263 ms (49.38% GC)
  --------------
  samples:          22
  evals/sample:     1

In [35]:
@benchmark myinc2($a)

BenchmarkTools.Trial: 
  memory estimate:  76.29 MiB
  allocs estimate:  2
  --------------
  minimum time:     32.978 ms (1.93% GC)
  median time:      34.622 ms (18.94% GC)
  mean time:        36.670 ms (20.75% GC)
  maximum time:     163.291 ms (82.76% GC)
  --------------
  samples:          137
  evals/sample:     1