*An intro to high performance custom arrays by Matt Bauman*

https://youtu.be/jS9eouMJf_Y

In [1]:
using BenchmarkTools

In [2]:
A = rand(1000,1000)

1000×1000 Array{Float64,2}:
 0.213189   0.584026    0.72996    …  0.200082   0.493674    0.648122 
 0.106868   0.773863    0.0840145     0.154692   0.647825    0.379732 
 0.0872056  0.441514    0.410738      0.602895   0.958855    0.171831 
 0.286881   0.154135    0.984402      0.0451315  0.447923    0.244536 
 0.228076   0.250626    0.478572      0.706206   0.602721    0.9089   
 0.11067    0.275993    0.939109   …  0.652209   0.425357    0.375174 
 0.465856   0.550967    0.814546      0.530511   0.28204     0.137086 
 0.900581   0.449723    0.613809      0.513894   0.593943    0.426416 
 0.499612   0.68982     0.0160085     0.829173   0.397067    0.0388353
 0.594068   0.320573    0.779716      0.361506   0.957168    0.323776 
 0.179284   0.11089     0.0120636  …  0.0648209  0.554395    0.109612 
 0.394442   0.190889    0.788738      0.798599   0.0825402   0.864797 
 0.154888   0.289707    0.0533554     0.329779   0.390892    0.187324 
 ⋮                                 ⋱             

In [5]:
function weighted_sum( A, weights=ones(size(A)))
    r = zero(A[1])
    for i in eachindex(A, weights)
        r += A[i]*weights[i]
    end
    return r
end
@btime weighted_sum(A)

  2.474 ms (3 allocations: 7.63 MiB)


500108.9329179399

In [6]:
@btime sum(A)

  258.922 μs (1 allocation: 16 bytes)


500108.9329179303

In [7]:
module V1
struct OnesMatrix <: AbstractArray{Int, 2}
    m::Int
    n::Int
end
Base.size(o::OnesMatrix) = (o.m, o.n)
Base.getindex(o::OnesMatrix, i::Int, j::Int) = 1
end 

Main.V1

In [8]:
x = V1.OnesMatrix(1000,1000)

1000×1000 Main.V1.OnesMatrix:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1  1  

In [10]:
function weighted_sum( A, weights=V1.OnesMatrix(size(A)...))
    r = zero(A[1])
    for i in eachindex(A, weights)
        r += A[i]*weights[i]
    end
    return r
end
@btime weighted_sum(A)

  1.733 ms (1 allocation: 16 bytes)


500108.9329179399

Add bounds checking

In [11]:
module V2
struct OnesMatrix <: AbstractArray{Int, 2}
    m::Int
    n::Int
end
Base.size(o::OnesMatrix) = (o.m, o.n)
function Base.getindex(o::OnesMatrix, i::Int, j::Int)
    checkbounds(o, i, j)
    1
end
end 

Main.V2

In [12]:
function weighted_sum( A, weights=V2.OnesMatrix(size(A)...))
    r = zero(A[1])
    @inbounds for i in eachindex(A, weights)
        r += A[i]*weights[i]
    end
    return r
end
@btime weighted_sum($A)

  1.952 ms (0 allocations: 0 bytes)


500108.9329179399

In [13]:
module V3
struct OnesMatrix <: AbstractArray{Int, 2}
    m::Int
    n::Int
end
Base.size(o::OnesMatrix) = (o.m, o.n)
@inline function Base.getindex(o::OnesMatrix, i::Int, j::Int)
    @boundscheck begin
        checkbounds(o, i, j)
    end
    1
end
end 

Main.V3

In [54]:
function weighted_sum( A, weights=V3.OnesMatrix(size(A)...))
    r = zero(A[1])
    @inbounds for i in eachindex(A, weights)
        r += A[i]*weights[i]
    end
    return r
end
@btime weighted_sum(A)

  1.679 ms (0 allocations: 0 bytes)


500215.87431490654

In [14]:
module V4
struct OnesMatrix <: AbstractArray{Int, 2}
    m::Int
    n::Int
end
Base.size(o::OnesMatrix) = (o.m, o.n)
Base.IndexStyle(::Type{OnesMatrix}) = IndexLinear()
@inline function Base.getindex(o::OnesMatrix, i::Int)
    @boundscheck begin
        checkbounds(o, i)
    end
    1
end
end 

Main.V4

In [15]:
function weighted_sum( A, weights=V4.OnesMatrix(size(A)...))
    r = zero(A[1])
    for i in eachindex(A, weights)
        r += A[i]*weights[i]
    end
    return r
end
@btime weighted_sum(A)

  776.087 μs (1 allocation: 16 bytes)


500108.9329179399