In [1]:
using Plots
using LaTeXStrings
using CSV
using DataFrames
using Statistics
using JSON
using StatsPlots

In [2]:
paths = Dict(
    "4_100" => ("exp1_goal_4_100_2025-05-24_10-58-48", 50),
    "4_1000" => ("exp1_goal_4_1000_2025-05-24_11-01-42", 50),
    "10_100" => ("exp1_goal_10_100_2025-05-24_11-00-59", 50),
    "10_1000" => ("exp1_goal_10_1000_2025-05-24_11-01-22", 50),
    "30_100" => ("exp1_goal_30_100_2025-05-24_11-01-05", 50),
    "30_1000" => ("exp1_goal_30_1000_2025-05-24_11-01-15", 50),
    "darwin" => ("exp_darwin_goal_2025-05-27_20-23-34", 15)
)

Dict{String, Tuple{String, Int64}} with 7 entries:
  "10_100"  => ("exp1_goal_10_100_2025-05-24_11-00-59", 50)
  "4_100"   => ("exp1_goal_4_100_2025-05-24_10-58-48", 50)
  "30_100"  => ("exp1_goal_30_100_2025-05-24_11-01-05", 50)
  "30_1000" => ("exp1_goal_30_1000_2025-05-24_11-01-15", 50)
  "darwin"  => ("exp_darwin_goal_2025-05-27_20-23-34", 15)
  "10_1000" => ("exp1_goal_10_1000_2025-05-24_11-01-22", 50)
  "4_1000"  => ("exp1_goal_4_1000_2025-05-24_11-01-42", 50)

In [5]:
function count_iterations(dataset)
    path = "exps/" * paths[dataset][1]
    iter = paths[dataset][2]
    num_iters = zeros(iter)
    for i in 1:iter
        df = CSV.read("$(path)/opt_times_$i.csv",DataFrame)
        num_iters[i] = df[end,"i"]
    end
    return num_iters
end

count_iterations (generic function with 1 method)

In [12]:
for k in keys(paths)
    println(k)
    iters = count_iterations(k)
    println("Mean iterations: $(mean(iters))")
    iter_100 = count(x -> x == 100.0, iters)
    it = paths[k][2]
    frac_iter_100 = iter_100/it
    println("Number of 100 iterations: $(iter_100)")
    println("Fraction of 100 iterations: $(frac_iter_100)")
end

10_100
Mean iterations: 49.68
Number of 100 iterations: 20
Fraction of 100 iterations: 0.4
4_100
Mean iterations: 10.04
Number of 100 iterations: 1
Fraction of 100 iterations: 0.02
30_100
Mean iterations: 74.18
Number of 100 iterations: 32
Fraction of 100 iterations: 0.64
30_1000
Mean iterations: 72.98
Number of 100 iterations: 31
Fraction of 100 iterations: 0.62
darwin
Mean iterations: 100.0
Number of 100 iterations: 15
Fraction of 100 iterations: 1.0
10_1000
Mean iterations: 58.46
Number of 100 iterations: 22
Fraction of 100 iterations: 0.44
4_1000
Mean iterations: 36.76
Number of 100 iterations: 2
Fraction of 100 iterations: 0.04


In [13]:
function get_g_and_acc(dataset)
    path = "exps/" * paths[dataset][1]
    iter = paths[dataset][2]
    Gs = zeros(iter)
    accs = zeros(iter)
    for i in 1:iter
        json_data = JSON.parsefile("$(path)/params_$i.json")
        Gs[i] = json_data["hps"]["G"]
        accs[i] = json_data["accuracy"]
    end
    return accs, Gs
end

get_g_and_acc (generic function with 1 method)

In [55]:
using Statistics

In [56]:
accs, Gs = get_g_and_acc("darwin")
cor(accs,Gs)

0.271336242908664

In [57]:
accs, Gs = get_g_and_acc("4_100")
cor(accs,Gs)

-0.10830283141008945

In [58]:
accs, Gs = get_g_and_acc("4_1000")
cor(accs,Gs)

-0.4693959847755619

In [59]:
accs, Gs = get_g_and_acc("10_100")
cor(accs,Gs)

-0.16614650718684273

In [60]:
accs, Gs = get_g_and_acc("10_1000")
cor(accs,Gs)

0.06586229330858143

In [61]:
accs, Gs = get_g_and_acc("30_100")
cor(accs,Gs)

0.029181676297636502

In [62]:
accs, Gs = get_g_and_acc("30_1000")
cor(accs,Gs)

0.09373552501832495

In [35]:
using Random
using MLJ
using MLJBase
using StatsBase

In [38]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 4
n = 100
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 42, 1 => 38), Dict(2 => 8, 1 => 12))

In [53]:
38/42, 8/20, 12/20

(0.9047619047619048, 0.4, 0.6)

In [39]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 4
n = 1000
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 391, 1 => 409), Dict(2 => 109, 1 => 91))

In [54]:
391/409, 91/200, 109/200

(0.9559902200488998, 0.455, 0.545)

In [40]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 10
n = 100
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 39, 1 => 41), Dict(2 => 11, 1 => 9))

In [50]:
39/41, 9/11

(0.9512195121951219, 0.8181818181818182)

In [41]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 10
n = 1000
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 398, 1 => 402), Dict(2 => 102, 1 => 98))

In [49]:
398/402, 102/98

(0.9900497512437811, 1.0408163265306123)

In [43]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 30
n = 100
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 39, 1 => 41), Dict(2 => 11, 1 => 9))

In [48]:
39/41, 9/11

(0.9512195121951219, 0.8181818181818182)

In [44]:
Random.seed!(50)
include("../test/synthdata2.jl")
d = 30
n = 1000
X, Y = make_synth_data2(n, d)
train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 403, 1 => 397), Dict(2 => 97, 1 => 103))

In [47]:
397/403, 97/103

(0.9851116625310173, 0.941747572815534)

In [45]:
Random.seed!(50)
df = CSV.read("../exp/darwin.csv", DataFrame)
X = Matrix{Float64}(df[:, 2:end-1])
X = X'
label_map = Dict("P" => 1, "H" => 2)
Y = [label_map[label] for label in df[:,end]]

train_indices, test_indices = partition(1:size(X, 2), 0.8, shuffle = true)
X_train = X[:, train_indices]
X_test = X[:, test_indices]
y_train = Y[train_indices]
y_test = Y[test_indices]
countmap(y_train), countmap(y_test)

(Dict(2 => 68, 1 => 71), Dict(2 => 17, 1 => 18))

In [46]:
68/71, 17/18

(0.9577464788732394, 0.9444444444444444)