In [28]:
using Plots
using GLM
using DataFrames
using Statistics

In [29]:
threadsUsed = [1, 2, 4, 8, 12, 16, 20, 24, 32, 40]
ideal = 1:maximum(threadsUsed)

T = length(threadsUsed)
N = 10

10

In [30]:
function extract_data(filename::AbstractString)
    data = zeros(Float64, T, 10)
    
    current_thread = 0
    i = 0
    j = 0
    
    open(filename, "r") do file
        for line in eachline(file)
            if occursin("Run with ", line)
                current_threads = filter(isdigit, line)
                i +=1
                j = 0
            elseif occursin("Time: ", line)
                j+=1
                time = parse(Float64, split(line, "Time: ")[end])
                data[i, j] = time
            end
        end
    end
    
    return data
end

extract_data (generic function with 1 method)

In [31]:
sequential_runtime = [1.58545, 1.56401, 1.56131, 1.55413, 1.5892, 
                        1.58396, 1.56357, 1.57103, 1.57567,1.5710]
runtime = extract_data("scaling_hist_seq-52011955.out")

10×10 Matrix{Float64}:
 1.58545   1.56401   1.56131   1.55413   …  1.57103   1.57567   1.57109
 1.324     1.32507   1.31283   1.31325      1.31968   1.31992   1.31563
 0.749351  0.881637  0.919764  0.643804     0.742255  0.740157  0.666159
 0.397573  0.39132   0.394323  0.397593     0.443237  0.436229  0.39271
 0.336424  0.272824  0.272744  0.271919     0.330494  0.277349  0.27396
 0.268915  0.223636  0.526868  0.291438  …  0.216737  0.217964  0.257837
 0.24648   0.219278  0.258487  0.236196     0.249663  0.246731  0.238456
 0.238462  0.234861  0.224432  0.246913     0.242638  0.265582  0.261329
 0.295376  0.263041  0.256353  0.285986     0.261576  0.2431    0.176828
 0.31109   0.263555  0.283234  0.354019     0.24409   0.177405  0.282348

In [32]:
median_runtime = median(runtime[1:6, :], dims=2)
average_runtime = mean(runtime[1:6, :], dims=2)

speedup = mean(sequential_runtime, dims=1) ./ median_runtime
average

6×1 Matrix{Float64}:
 1.0005556757857752
 1.19103879375663
 2.1077053860067605
 3.982546480857247
 5.76255572174321
 6.695900715835927

In [33]:
model = lm(@formula(transpose(speedup) ~ threadsUsed), DataFrame(speedup=speedup[1:6], threadsUsed=threadsUsed[1:6]))

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

:(transpose(speedup)) ~ 1 + threadsUsed

Coefficients:
────────────────────────────────────────────────────────────────────────
                Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────
(Intercept)  0.566271   0.187272    3.02    0.0390  0.0463198    1.08622
threadsUsed  0.403318   0.0208295  19.36    <1e-04  0.345486     0.46115
────────────────────────────────────────────────────────────────────────

In [34]:
intercept = coef(model)[1]
slope = coef(model)[2]
fit = [slope * i + intercept for i in 1:maximum(threadsUsed[1:6])]

16-element Vector{Float64}:
 0.9695893267369924
 1.3729073487251977
 1.776225370713403
 2.1795433927016084
 2.582861414689814
 2.986179436678019
 3.389497458666224
 3.7928154806544296
 4.196133502642635
 4.59945152463084
 5.002769546619046
 5.406087568607251
 5.809405590595457
 6.212723612583661
 6.616041634571866
 7.019359656560072

In [36]:
scatter(threadsUsed[1:6], speedup, xlabel="Number of Threads", ylabel="Speedup", label="Measured")
plot!(ideal[1:16], label="Ideal (1)")
xticks!(threadsUsed)
plot!(fit, label="Fit (0.4)")
savefig("strongHistScaling.pdf")

"/home/timo/ETH/FS24/HPCL24/Project02/project02_skeleton_codes/hist/strongHistScaling.pdf"