In [None]:
import MLJ
import DataFrames as DF
import CSV
using DataFramesMeta
using StatsPlots
using LaTeXStrings

In [None]:
include("PreprocessAerosolData.jl")
include("ActivationEmulatorModels.jl")

In [None]:
X_test, Y_test = read_aerosol_dataset("datasets/2modal_dataset2_test.csv")

In [None]:
mach_name = "2modal_nn_machine1"
mach = MLJ.machine("emulators/$(mach_name).jls")

In [None]:
pred_S_max = exp.(MLJ.predict(mach, X_test))
ARG_S_max = exp.(preprocess_aerosol_data(X_test).log_ARG_S_max)
PySDM_S_max = exp.(Y_test)

In [None]:
minpt = min(minimum(PySDM_S_max), minimum(ARG_S_max), minimum(pred_S_max))
maxpt = max(maximum(PySDM_S_max), maximum(ARG_S_max), maximum(pred_S_max))

function plot_ref_lines!()
    plot!([minpt, maxpt], [minpt, maxpt], color=:red, label=nothing)
    plot!([minpt, maxpt], [minpt * 2, maxpt * 2], color=:blue, label=nothing)
    plot!([minpt, maxpt], [minpt / 2, maxpt / 2], color=:blue, label=nothing)
end

p1 = scatter(PySDM_S_max, ARG_S_max, color=:black, markersize=1,
            markerstrokewidth=0, label=nothing, xscale=:log10, yscale=:log10,
            title="ARG", xlabel=L"PySDM $S_{max}$", ylabel=L"Predicted $S_{max}$")
plot_ref_lines!()
p2 = scatter(PySDM_S_max, pred_S_max, color=:black, markersize=1,
            markerstrokewidth=0, label=nothing, xscale=:log10, yscale=:log10,
            title="Emulator", xlabel=L"PySDM $S_{max}$", ylabel=L"Predicted $S_{max}$")
plot_ref_lines!()
combined_plot = plot(p1, p2)
savefig("plots/model_comparison_$(mach_name).pdf")
combined_plot

In [None]:
function loghist(x; nbins=30, kwargs...)
    return histogram(x, bins = (10.0 .^ range(log10(minimum(x)), log10(maximum(x)), nbins)), xlims = (minimum(x), maximum(x)), xscale=:log10; kwargs...)
end

p1 = loghist(ARG_S_max ./ PySDM_S_max, label=nothing, xlabel=L"predicted / true $S_{max}$", title="ARG")
p2 = loghist(pred_S_max ./ PySDM_S_max, label=nothing, xlabel=L"predicted / true $S_{max}$", title="Emulator")
plot(p1, p2)