This notebook is intended for visualizing statistics associated with the comparisson of multiple models. The input is summary statistics in the form of a json file, containing a dictionary with stats for each model.

In [None]:
using JSON, DataFrames, StatsPlots, Measures, CSV

gr()
theme(:ggplot2)

In [None]:
stats_list = JSON.parsefile("article_runs/stats.json");

In [None]:
stats_list

In [None]:
function get_model_features_from_path(path)
    pathlist = split(path, '/')
    features = Dict(
        "training_set" => parse(Int64, split(pathlist[end-3][2:end], "_")[1]),
        "model_name" => String(pathlist[end-2]),
        "evaluation_set" => String(pathlist[end])
    )
end

out_dir = "/home/ebr/projects/tsunami-inundation-emulator/article_runs/figures"

In [None]:
for model_stats in stats_list
    model_stats["features"] = get_model_features_from_path(model_stats["eval_dir"])
end

In [None]:
stats_list[1]

In [None]:
df = DataFrame(
    evaluation_set = [model_stats["features"]["evaluation_set"] for model_stats in stats_list],
    training_set = [model_stats["features"]["training_set"] for model_stats in stats_list],
    model = [model_stats["features"]["model_name"] for model_stats in stats_list],
    q95_l2 =[model_stats["stats"]["q95_l2"] for model_stats in stats_list],
    mean_l2 =[model_stats["stats"]["mean_l2"] for model_stats in stats_list],
    aida_K_q95 =[model_stats["stats"]["aida_K_q95"] for model_stats in stats_list],
    std_res_q95 =[model_stats["stats"]["std_res_q95"] for model_stats in stats_list],
)

In [None]:
df_features = DataFrame(
    evaluation_set = [model_stats["features"]["evaluation_set"] for model_stats in stats_list],
    training_set = [model_stats["features"]["training_set"] for model_stats in stats_list],
    model = [model_stats["features"]["model_name"] for model_stats in stats_list]
);

df_stats = DataFrame([Symbol(c) => [model_stats["stats"][c] for model_stats in stats_list] for c in keys(stats_list[1]["stats"])]);

df = hcat(df_features, df_stats)

In [None]:
df_test = sort(df[df.evaluation_set .== "test", [:model, :training_set, :mean_l2, :q95_l2]], :q95_l2)

In [None]:
gdf = groupby(df_test, :model)

In [None]:
tdf = gdf[3] 
#tdf[!, :id] = 1:size(tdf, 1)

In [None]:
colnames = [:model, :mean_l2, :q95_l2]

In [None]:
tdf

In [None]:
tdf[1,:model] 

In [None]:
sdf = stack(tdf,  [:mean_l2, :q95_l2])

In [None]:
unstack(sdf, :training_set, :value)

In [None]:
gdf[1]

In [None]:
res_dfs = []
for tdf in gdf
    colnames = names(tdf)
    #tdf[!, :id] = 1:size(tdf, 1)
    sdf = stack(tdf,  [:mean_l2, :q95_l2])
    if tdf[1,:model] in ["mc32_l16_rel_reg", "mc8_l8_rel", "mc32_l16_rel_reg"]
        push!(res_dfs, unstack(sdf, :training_set, :value))
    end
end

In [None]:
res_dfs = []
for tdf in gdf
    colnames = names(tdf)
    tdf[!, :id] = 1:size(tdf, 1)
    sdf = stack(tdf,  [:training_set, :mean_l2, :q95_l2])
    push!(res_dfs, unstack(sdf, :id, :value))
end

In [None]:
scores = vcat(res_dfs..., cols=:union)

In [None]:
show(scores, allrows=true, allcols=true)

In [None]:
CSV.write(joinpath(out_dir, "df_score_stats.csv"), scores)

In [None]:
sort(df[df.evaluation_set .== "test" .&& df.training_set .== 295,:], :q95_l2)

In [None]:
sort(df[df.evaluation_set .== "test" .&& df.training_set .== 591,:], :q95_l2)

In [None]:
sort(df[df.evaluation_set .== "test" .&& df.training_set .== 1831,:], :q95_l2)

In [None]:
sort!(df, [:training_set])

p1 = @df df[df.evaluation_set .== "test" .&& (df.model .== "mc8_l8" .|| df.model .== "mc32_l16" .|| df.model .== "mc32_l16_rel" .|| df.model .== "mc8_l8_rel"), :] plot(
    :training_set,
    :mean_l2,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    ylim = (0.015, 0.045),
    ylabel = "Mean \$\\ell^2\$-error", 
    xlabel = "Size of training set",
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
    margin = 5mm
)

p2 = @df df[df.evaluation_set .== "train" .&& (df.model .== "mc8_l8" .|| df.model .== "mc32_l16" .|| df.model .== "mc32_l16_rel" .|| df.model .== "mc8_l8_rel"), :] plot(
    :training_set,
    :mean_l2,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    ylim = (0.015, 0.045),
    #ylabel = "95% quantile of \$\\ell^2\$-error", 
    xlabel = "Size of training set",
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
    margin = 5mm
)

p = plot(p1, p2, layout = (1,2), size=(900,400), title=["Test" "Train"])
savefig(p, joinpath(out_dir, "compare_mean_l2.svg"))
display(p)

In [None]:
p = @df df[df.evaluation_set .== "test" .&& (df.model .== "mc8_l8" .|| df.model .== "mc32_l16" .|| df.model .== "mc8_l8_rel" .|| df.model .== "mc32_l16_rel"), :] plot(
    :training_set,
    :aida_K_q50,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
)

In [None]:
@df df[df.evaluation_set .== "test", :] scatter(
    :training_set,
    :q95_l2,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
)

In [None]:
@df df[df.evaluation_set .== "train", :] scatter(
    :training_set,
    :q95_l2,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
)

In [None]:
@df df[df.evaluation_set .== "test", :] scatter(
    :training_set,
    :std_res_q95,
    group = :model,
    xscale = :log10,
    xlim = (100, 5000),
    m = (0.5, [:sq :h :d :star7 :c :star5], 10),
)

In [None]:
sort!(df[df.evaluation_set .== "test",:], :mean_l2)

In [None]:
"hald"[2:end]