In [1]:
using BenchmarkTools
using Unrolled

using SIMD

using ChipSort

In [2]:
function run_bench_mine(T, chunks, ::Val{N}, ::Val{L}) where {N, L}
    data = rand(T, chunks*N*L)
    stat = @benchmark chipsort_medium($data, Val($N), Val($L))
    stat
end

function run_bench_base(T, chunks, ::Val{N}, ::Val{L}) where {N, L}
    data = rand(T, chunks*N*L)
    stat = @benchmark sort($data)
    stat
end

run_bench_base (generic function with 1 method)

In [None]:
struct ExpRun
    code
    eltype
    data_size
    vec_size
    vec_count
    bench
end

function Base.show(io::IO, ee::ExpRun)
    print(ee.code, " ")
    print(ee.eltype, " ")
    print(ee.data_size, " ")
    print(ee.vec_size, " ")
    print(ee.vec_count, " ")
    print(ee.bench.times[div(end,2)])
end

@unroll function run_tests(data_size, nn, ll)
    exps = ExpRun[]
    T = Float32
    
    @unroll for cs in data_size
    @unroll for n in nn
        @unroll for l in ll
            tt = run_bench_mine(T, cs, Val(n), Val(l))
            ee = ExpRun(:chip, T, cs, n, l, tt)
            println(ee)
            push!(exps, ee)
            tt = run_bench_base(T, cs, Val(n), Val(l))
            ee = ExpRun(:juli, T, cs, n, l, tt)
            println(ee)
            push!(exps, ee)
        end
    end
    end
    exps
end

chunk_sizes = (16,8,4,2)
N = (8,4,2)
L = (8,4,2)

data = run_tests(chunk_sizes, N, L)

chip Float32 16 8 8 76357.0
juli Float32 16 8 8 22693.0
chip Float32 16 8 4 59674.0
juli Float32 16 8 4 4786.571428571428
chip Float32 16 8 2 50635.0
juli Float32 16 8 2 2089.3
chip Float32 16 4 8 57263.0
juli Float32 16 4 8 4233.857142857143
chip Float32 16 4 4 56887.0
juli Float32 16 4 4 2259.4
chip Float32 16 4 2 48469.0
juli Float32 16 4 2 837.671052631579
chip Float32 16 2 8 56035.0
juli Float32 16 2 8 2187.4
chip Float32 16 2 4 49446.0
juli Float32 16 2 4 834.746835443038
chip Float32 16 2 2 47857.0
juli Float32 16 2 2 383.1527093596059
chip Float32 8 8 8 30589.0
juli Float32 8 8 8 4234.571428571428
chip Float32 8 8 4 25303.0
juli Float32 8 8 4 2137.6
chip Float32 8 8 2 23791.0
juli Float32 8 8 2 820.547619047619
chip Float32 8 4 8 23868.0
juli Float32 8 4 8 2107.7
chip Float32 8 4 4 23026.0
juli Float32 8 4 4 856.7727272727273
chip Float32 8 4 2 26935.0
juli Float32 8 4 2 377.4166666666667
chip Float32 8 2 8 23763.0
juli Float32 8 2 8 857.955223880597
chip Float32 8 2 4 26923.0
