In [None]:
using DataFrames
using Distributions
using CSV
using HDF5
using Plots
using StatsPlots
using Statistics

include("ActiveDomainAdaptation.jl")
using .ActiveDomainAdaptation
include("DataSets.jl")
using .DataSets

In [None]:
X_mnist_train, y_mnist_train, X_mnist_test, y_mnist_test = prepare_mnist(get_mnist("data/mnist"))
size(X_mnist_train), size(X_mnist_test)

In [None]:
random_df = DataFrame(CSV.File("data/random-sampling.csv"))
random_gdf = groupby(random_df, :round)
accuracies_mean_random = combine(random_gdf, :accuracy => mean).accuracy_mean
accuracies_std_random = combine(random_gdf, :accuracy => std).accuracy_std
size(accuracies_mean_random), size(accuracies_std_random)

In [None]:
entropy_df = DataFrame(CSV.File("data/entropy-sampling.csv"))
entropy_gdf = groupby(entropy_df, :round)
accuracies_mean_entropy = combine(entropy_gdf, :accuracy => mean).accuracy_mean
accuracies_std_entropy = combine(entropy_gdf, :accuracy => std).accuracy_std
size(accuracies_mean_entropy), size(accuracies_std_entropy)

In [None]:
n = 30
confidence_level = 0.99
α = 1 - confidence_level
t = cquantile(TDist(n - 1), α / 2)

In [None]:
confidence_interval_random = t * (accuracies_std_random / sqrt(n))

In [None]:
confidence_interval_entropy = t * (accuracies_std_entropy / sqrt(n))

In [None]:
scatter(0:30, accuracies_mean_random, yerror=confidence_interval_random,
    label="Random Sampling",
    legend_position=:bottomright, xlabel="Round", ylabel="Accuracy")
scatter!(0:30, accuracies_mean_entropy, yerror=confidence_interval_entropy,
    label="Entropy Sampling")

In [None]:
@df random_df boxplot(:round, :accuracy, label="Random Sampling",
    legend_position=:bottomright, xlabel="Round", ylabel="Accuracy")
@df entropy_df boxplot!(:round, :accuracy, label="Entropy Sampling")

In [None]:
file = "data/human_labeller.hdf5"
h5open(file, "w") do datafile
    write(datafile, "X", X_mnist_train)
    write(datafile, "y", y_mnist_train)
end

function human_labeller_wrap(index_query, logit_query, round)
    human_labeller(index_query, logit_query, round, y_mnist_train, file)
end 

rounds_human, accuracies_human = simulate_al(
    entropy_sampling, human_labeller_wrap,
    LeNetVariant("lenet.bson"),
    X_mnist_train, y_mnist_train,
    X_mnist_test, y_mnist_test,
    n_query=100)