In [1]:
using BenchmarkTools, DataFrames, Plots, StatsPlots, Statistics, Measurements, CSV

In [2]:
function sum_by_row(A)
    s = zero(eltype(A))
    n, m = size(A)
    for i in 1:n
        for j in 1:m
            s += A[i, j]
        end
    end
    return s
end

sum_by_row (generic function with 1 method)

In [3]:
function sum_by_col(A)
    s = zero(eltype(A))
    n, m = size(A)
    for j in 1:m
        for i in 1:n
            s += A[i, j]
        end
    end
    return s
end

sum_by_col (generic function with 1 method)

In [4]:
function bench(N)
    A = rand(N, N)
    tr = @benchmark sum_by_row($A)
    tc = @benchmark sum_by_col($A)
    dfr = DataFrame(Size = N, Method = "row", Time = tr.times)
    dfc = DataFrame(Size = N, Method = "col", Time = tc.times)
    vcat(dfr, dfc)
end

bench (generic function with 1 method)

In [5]:
sizes = range(512, 8192, length=51);

In [6]:
# df = vcat(map(sizes) do size
#     N = convert(Int64, round(size, digits=0))
#     bench(N)
# end...)
# CSV.write("results.csv", df)

In [7]:
df = CSV.read("results.csv", DataFrame);

In [8]:
df_agg = combine(
    groupby(df, [:Method, :Size]), 
    :Time => (x -> mean(x)±std(x)) => :Mtime
);
transform!(df_agg, :Mtime => (x->x./1e6) => :Mtime);

In [9]:
df_row = filter(:Method => ==("row"), df_agg);
df_col = filter(:Method => ==("col"), df_agg);

In [16]:
@df df_row plot(:Size, :Mtime, label="sum_by_row", legend=:topleft)
@df df_col plot!(:Size, :Mtime, label="sum_by_col")
xlabel!("Matrix A NxN size")
ylabel!("Time [ms]")
savefig("../../static/posts/speed-of-traversing-matrix/benchmark.png")

In [11]:
A = rand(3000, 3000)
@benchmark sum_by_col(A)

BenchmarkTools.Trial: 306 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m15.548 ms[22m[39m … [35m22.414 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m15.669 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m16.351 ms[22m[39m ± [32m 1.327 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[34m▆[39m[39m▂[39m [39m▂[39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[34m█[39m[39m█[39m█[39m█[39m█

In [12]:
@benchmark sum_by_row(A)

BenchmarkTools.Trial: 65 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m72.929 ms[22m[39m … [35m175.824 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m75.294 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m77.286 ms[22m[39m ± [32m 12.711 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[39m▅[39m [39m▂[39m [39m [39m [39m [39m [39m [34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m█[39m█[39m▁[3