In [None]:
using Revise
import MLJ
import DataFrames as DF
import CSV
import StatsBase
import EvoTrees
import SymbolicRegression
import CloudMicrophysics.PreprocessAerosolData as PAD
using DataFramesMeta
using CairoMakie

In [None]:
include("ReadAerosolDataset.jl")

In [None]:
function plot_accuracy_scatterplot(dataset_filename, mach_filename = nothing, use_S_max = false)
    if use_S_max
        X_test, Y_test, initial_data = read_aerosol_dataset(dataset_filename, :S_max)
    else
        X_test, Y_test, initial_data = read_aerosol_dataset(dataset_filename)
    end
    ARG_act_frac = PAD.get_ARG_act_frac(X_test)[:,1]
    if mach_filename === nothing
        predict_time = @elapsed pred_act_frac = ARG_act_frac
    else
        mach = MLJ.machine(mach_filename)
        predict_time = @elapsed begin
            if use_S_max
                pred_act_frac = PAD.get_ARG_act_frac(X_test, MLJ.predict(mach, X_test))[:,1]
            else
                pred_act_frac = MLJ.predict(mach, X_test)
            end
        end
    end
    predict_time_per_entry = predict_time / length(Y_test)
    println("Time to predict per entry: $(predict_time_per_entry) seconds")
    if use_S_max
        PySDM_act_frac = PAD.get_ARG_act_frac(X_test, Y_test)[:,1]
    else
        PySDM_act_frac = Y_test
    end
    println("RMSE: ", sqrt(StatsBase.msd(PySDM_act_frac, pred_act_frac)))
    fig = Figure(resolution=(500, 500))
    ax = Axis(fig[1,1], xlabel="PySDM act frac", ylabel="Predicted act frac")
    scatter!(ax, PySDM_act_frac, pred_act_frac, color=:blue, markersize=10, alpha=0.2)
    lines!(ax, [0, 1], [0, 1], color=:red, label=nothing)
    return fig
end

In [None]:
plot_accuracy_scatterplot("datasets/1modal_dataset1_test.csv", "emulators/1modal_nn_machine_naive.jls")