http://web.mit.edu/julia_v0.6.2/julia/share/doc/julia/html/en/manual/performance-tips.html

https://www.cs.purdue.edu/homes/hnassar/JPUG/performance.html

https://discourse.julialang.org/t/when-shouldnt-we-use-simd/18276/2

In [1]:
using BenchmarkTools

In [2]:
function convolve_simple(g::Vector{T}, u::Vector{T})::Vector{T} where T <: Number
    y = similar(u)
    for t = 1:length(u)
        val = zero(y[t])
        for τ = 1:t
            val += g[t - τ + 1] * u[τ]
        end # for
        y[t] = val
    end # for
    return y
end

convolve_simple (generic function with 1 method)

In [8]:
function convolve_inbounds(g::Vector{T}, u::Vector{T})::Vector{T} where T <: Number
    y = similar(u)
    @inbounds for t = 1:length(u)
        val = zero(y[t])
        for τ = 1:t
            val += g[t - τ + 1] * u[τ]
        end # for
        y[t] = val
    end # for
    return y
end

convolve_inbounds (generic function with 1 method)

In [20]:
function convolve_simd(g::Vector{T}, u::Vector{T})::Vector{T} where T <: Number
    y = similar(u)
    @inbounds @simd for t = 1:length(u)
        val = zero(y[t])
        @simd for τ = 1:t
            val += g[t - τ + 1] * u[τ]
        end # for
        y[t] = val
    end # for
    return y
end

convolve_simd (generic function with 1 method)

In [28]:
function convolve_fastmath(g::Vector{T}, u::Vector{T})::Vector{T} where T <: Number
    y = similar(u)
    @fastmath @inbounds @simd for t = 1:length(u)
        val = zero(y[t])
        @simd for τ = 1:t
            val += g[t - τ + 1] * u[τ]
        end # for
        y[t] = val
    end # for
    return y
end

convolve_fastmath (generic function with 1 method)

In [29]:
N = 1000

1000

In [38]:
g = rand(Float64, N);
u = rand(Float64, N);

In [31]:
@btime convolve_simple(g, u);

  338.700 μs (1 allocation: 7.94 KiB)


In [32]:
@btime convolve_inbounds(g, u);

  336.300 μs (1 allocation: 7.94 KiB)


In [40]:
@btime convolve_simd(g, u);

  67.100 μs (1 allocation: 7.94 KiB)


In [39]:
@btime convolve_fastmath(g, u);

  68.300 μs (1 allocation: 7.94 KiB)
