# Create size error plot with selected scenarios.

In [None]:
using Pkg
Pkg.activate(".")

In [None]:
using Plots, DelimitedFiles, DataFrames, CSV
import YAML

gr()

In [None]:
# Create readers for data loading.
include("datareader.jl")

rundir = "/home/ebr/projects/tsunami-inundation-emulator/article_runs/t591/mc8_l8_rel/"  
eval_dir = joinpath(rundir, "evaluation/test")
preds_dir = joinpath(rundir, "evaluation/preds")
#eval_dir = joinpath(rundir, "evaluation", "test_40000")
config = DataReader.parse_config(joinpath(rundir, "config.yml"))
#evaluation_scenarios = config["test_data"]
#reader = DataReader.Reader(config)

# Create evaluation directory
if !isdir(preds_dir)
    mkpath(preds_dir)
end

In [None]:
# Load output as dataframe.
df = CSV.File(joinpath(eval_dir, "summary_results.txt"); delim='\t') |> DataFrame;
unique!(df, :scenario)
# sort!(test_df, [:error]);

In [None]:
names(df)

In [None]:
sort!(df, :l2_norm);

df.log2_l2norm = log2.(df.l2_norm);

nbins = 20
max_samples_per_bin = 1

h = fit(Histogram, df.log2_l2norm, nbins=nbins)


df[!, :selected] = falses(size(df)[1]);
#df = df[shuffle(1:size(df, 1)),:] # Random selection

bounds = prepend!(cumsum(h.weights),1)
for i in 1:length(h.weights)
    nr_of_samples = min(max_samples_per_bin, h.weights[i]+1)
    rows = sample(bounds[i]:bounds[i+1], nr_of_samples; replace = false, ordered = true)
    df.selected[rows] = trues(nr_of_samples)
end

In [None]:
p = plot(h, label="Total", ylabel="Nr. of scenarios", yaxis = (:log10, (1,Inf)), alpha=0.2)
p = histogram!(p, df[df.selected .== true,:log2_l2norm], label="Selected", nbins=nbins)

In [None]:
df[df.selected .== true,:scenario]

In [None]:
# Write selected scenarios to file.

open(joinpath(preds_dir,"selected_scenarios.txt"), "w") do io
    writedlm(io, df[df.selected .== true,:scenario])
end

## Scatter plot with scenario labels.

In [None]:
p = scatter(
    df[df.selected .== false, :l2_norm], 
    df[df.selected .== false, :l2_err], 
    scale = :log10, 
    xlabel="\$\\ell^2\$-norm", 
    ylabel="\$\\ell^2\$-error", 
    label = false, 
    markershape=:circle,
    markersize = 1.5,
    minorgrid = true,
    alpha=0.2,
    legend=:topleft,
    ylims=(1e-3,0),
    xlims=(1e-3,0),
    ticks=[1e-2,1e-1,1],
    dpi=300,
    aspect_ratio = 1.,
)

p = @df df[df.selected .== true,:] scatter!(
    p, 
    :l2_norm,
    :l2_err,
    alpha=0.5,
    label=false,
    marker = (14, 0.3, :orange)
) 
@df df[df.selected .== true,:] annotate!(
    p, 
    :l2_norm, 
    :l2_err,
    [text("$i", 8, :center) for i in 1:sum(df.selected .== true)]
)

plot!(p, [1e-3,1],[1e-3,1], linestyle=:dash, linewidth=3, linecolor=:grey, label="")
savefig(joinpath(preds_dir, "size-error-scatter-selected.png"))
display(p)

In [None]:
# Run predict.jl on selected_scenarios.txt

In [None]:
pwd()

In [None]:
print("julia --project predict.jl $(rundir) $(joinpath(preds_dir,"selected_scenarios.txt")) --output-dir $(preds_dir)")