# Accuracy-based BO example

In [7]:
include("src/BayesianCausalityOptimization.jl");

using .BayesianCausalityOptimization

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);
#pair_ids = ["001","007","016","025","034","049","064","072","073","078","086","087","088","096","100"]
#selected_data = create_data(tuebingen_data, pair_ids, 1000);

T_max = 10
Rs = 5:5
EPOCHS = 10
M = 2
P = 10
n_iterations = 10

optimizer, optimum = bayesian_optimization(tuebingen_data, T_max;Rs=Rs, EPOCHS=EPOCHS, M=M, P=P, 
                                            n_iterations=n_iterations)

# Likelihood-based BO

In [1]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";

In [2]:
valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);

│   caller = ip:0x0
└ @ Core :-1


In [4]:
function prepare_empty_parameter_dictionary(Rs::UnitRange{Int64}, data::Array{Dict{Symbol,Any},1})
    DIRECTION = ['>','<', '^']
    resulting_parameters = Dict()
    likelihoods = Dict()
    for (id, pair) ∈ enumerate(data)
        resulting_parameters[pair[:id]] = Dict()
        likelihoods[pair[:id]] = Dict()
        for R in Rs
            resulting_parameters[pair[:id]][R] = Dict()
            likelihoods[pair[:id]][R] = Dict()
            for direction in DIRECTION
                resulting_parameters[pair[:id]][R][direction] = Dict()
                likelihoods[pair[:id]][R][direction] = Dict()
            end
        end
    end
    return resulting_parameters, likelihoods
end

function bayesian_optimization_likelihood(xⁿ, yⁿ; R, EPOCHS, M, P, n_iterations, m₁, m₂)
    config = ConfigParameters()        # calls initialize_parameters_to_default of the C API
    config.n_iterations = n_iterations;
    config.verbose_level = 0
    set_kernel!(config, "kMaternARD5")  # calls set_kernel of the C API
    config.sc_type = SC_MAP;
    f((γ, λ₁, λ₂, a₁, a₂, b₁, b₂)) = -v_bayes([xⁿ yⁿ],R;M=M,γ=exp(γ),m₁=m₁,m₂=m₂,λ₁=exp(λ₁),λ₂=exp(λ₂),
        a₁=exp(a₁),a₂=exp(a₂),b₁=exp(b₁),b₂=exp(b₂),EPOCHS=EPOCHS)[1][end]
    param_keys = ["γ", "λ₁", "λ₂", "a₁", "a₂", "b₁", "b₂"]
    lowerbound = [log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001)]; 
    upperbound = [log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.)];
    @time optimizer, optimum = bayes_optimization(f, lowerbound, upperbound, config)
    return Dict(zip(param_keys, optimizer)), optimum
    end;
                    
function vb_hyperparameter_optimization(data,T_max=Inf; name="hp_opt_likelihood", M=1, P=1, Rs=1:1, m₁=0.0, m₂=0.0,
        EPOCHS=1, warm_start_type="free", n_iterations=200, norm=true)
    mkpath("$RESULTS_PATH/$(name)_$(warm_start_type)")
    DIRECTION = ['>','<', '^']
    parameters, likelihoods = prepare_empty_parameter_dictionary(Rs, data)
    for (id,pair) ∈ enumerate(data)
    	print(pair[:id],"...\t\t\t")
        T = length(pair[:X])
        perm = randperm(T)[1:Int(min(T,T_max))]
        x, y = pair[:X][perm], pair[:Y][perm]
        for R in Rs
            println(" ", R)
            xⁿ = norm ? (x .- mean(x)) / std(x) : x
            yⁿ = norm ? (y .- mean(y)) / std(y) : y
            parameters[pair[:id]][R]['^'], likelihoods[pair[:id]][R]['^'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=1, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['>'], likelihoods[pair[:id]][R]['>'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['<'], likelihoods[pair[:id]][R]['<'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
        end        
    end
    metaparameters = Dict("M"=>M, "EPOCHS"=>EPOCHS, "P"=>P, "m₁"=>m₁, "m₂"=>m₂, "T_max"=>T_max, 
        "Rs"=>Rs, "warm_start_type"=>warm_start_type, "n_iterations"=>n_iterations, 
        "SEED"=>SEED, "norm"=>norm)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/metaparameters.json", metaparameters=metaparameters)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/parameters.json", parameters=parameters);
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/likelihoods.json", likelihoods=likelihoods);
    end;

In [5]:
vb_hyperparameter_optimization(tuebingen_data[75:end],1000; name="hp_opt_likelihood_4", M=4, P=100, Rs=1:5, m₁=0.0, m₂=0.0,
        EPOCHS=200, warm_start_type="free", n_iterations=200)

pair0080...			 1


│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:441
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:441
│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:443
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:443
│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:446
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:446
│   caller = #v_bayes#24(::Int64, ::F

 23.043486 seconds (6.23 M allocations: 441.569 MiB, 0.62% gc time)
 21.973428 seconds (6.00 M allocations: 667.103 MiB, 0.82% gc time)
 23.982096 seconds (6.00 M allocations: 667.103 MiB, 0.73% gc time)
 2
 76.878815 seconds (313.70 M allocations: 22.419 GiB, 7.80% gc time)
 86.336407 seconds (272.28 M allocations: 29.524 GiB, 8.45% gc time)
102.715862 seconds (349.05 M allocations: 37.873 GiB, 8.10% gc time)
 3
114.960742 seconds (593.35 M allocations: 42.744 GiB, 9.45% gc time)
242.632879 seconds (952.72 M allocations: 104.955 GiB, 11.02% gc time)
192.526395 seconds (746.65 M allocations: 82.161 GiB, 11.38% gc time)
 4
221.962256 seconds (1.34 G allocations: 96.601 GiB, 11.23% gc time)
250.140579 seconds (1.01 G allocations: 110.971 GiB, 11.68% gc time)
314.230471 seconds (1.33 G allocations: 147.010 GiB, 11.36% gc time)
 5
286.212080 seconds (1.76 G allocations: 126.883 GiB, 11.74% gc time)
391.157672 seconds (1.71 G allocations: 189.959 GiB, 12.66% gc time)
372.506648 seconds (1.5

263.123709 seconds (1.07 G allocations: 118.163 GiB, 13.87% gc time)
 4
172.467844 seconds (1.08 G allocations: 77.007 GiB, 13.49% gc time)
309.183642 seconds (1.34 G allocations: 148.642 GiB, 12.92% gc time)
309.158279 seconds (1.36 G allocations: 150.778 GiB, 13.91% gc time)
 5
257.992568 seconds (1.72 G allocations: 123.766 GiB, 14.53% gc time)
363.853278 seconds (1.62 G allocations: 180.757 GiB, 15.15% gc time)
379.826120 seconds (1.78 G allocations: 197.702 GiB, 13.45% gc time)
pair0088...			 1
 27.501211 seconds (2.20 M allocations: 172.432 MiB, 0.15% gc time)
 23.351360 seconds (2.42 M allocations: 269.098 MiB, 0.25% gc time)
 20.005143 seconds (2.42 M allocations: 269.098 MiB, 0.32% gc time)
 2
 49.611241 seconds (94.88 M allocations: 7.092 GiB, 3.97% gc time)
 70.026374 seconds (142.05 M allocations: 15.415 GiB, 5.82% gc time)
 87.511789 seconds (174.32 M allocations: 18.923 GiB, 5.86% gc time)
 3
107.829792 seconds (295.04 M allocations: 22.097 GiB, 6.00% gc time)
128.417456 

 22.512447 seconds (7.32 M allocations: 541.813 MiB, 0.64% gc time)
 24.882423 seconds (8.16 M allocations: 908.432 MiB, 0.88% gc time)
 24.665201 seconds (8.16 M allocations: 908.432 MiB, 0.91% gc time)
 2
 96.756411 seconds (499.15 M allocations: 35.405 GiB, 10.33% gc time)
143.237807 seconds (524.17 M allocations: 56.915 GiB, 10.99% gc time)
172.430886 seconds (672.86 M allocations: 73.085 GiB, 12.43% gc time)
 3
150.854923 seconds (893.62 M allocations: 63.913 GiB, 12.96% gc time)
276.418912 seconds (1.23 G allocations: 135.260 GiB, 12.25% gc time)
265.560900 seconds (1.14 G allocations: 125.569 GiB, 12.08% gc time)
 4
235.178279 seconds (1.64 G allocations: 117.418 GiB, 13.10% gc time)
421.463973 seconds (1.96 G allocations: 216.483 GiB, 14.86% gc time)
348.818156 seconds (1.55 G allocations: 171.441 GiB, 13.97% gc time)
 5
259.500886 seconds (1.82 G allocations: 130.574 GiB, 13.70% gc time)
431.287411 seconds (2.04 G allocations: 225.437 GiB, 14.67% gc time)
395.909486 seconds (1

"{\"likelihoods\":{\"pair0087\":{\"4\":{\"<\":-923.3452665034149,\"^\":-681.7501131723368,\">\":-727.9613777799211},\"2\":{\"<\":-782.0145122615323,\"^\":-804.0204323497078,\">\":-785.6153655585455},\"3\":{\"<\":-906.7409863692563,\"^\":-966.1983174396693,\">\":-895.8859533285383},\"5\":{\"<\":-677.9431502544531,\"^\":-512.7682676519671,\">\":-702.2349412133403},\"1\":{\"<\":2810.6114180499017,\"^\":2848.396012638328,\">\":2811.033993157793}},\"pair0090\":{\"4\":{\"<\":356.1928794079062,\"^\":363.08652535802236,\">\":358.5972855673296},\"2\":{\"<\":356.1312816706932,\"^\":360.63331599250444,\">\":356.1675519032518},\"3\":{\"<\":363.03125448044136,\"^\":362.47203437487553,\">\":355.43710410824315},\"5\":{\"<\":362.0819768889029,\"^\":369.35681173508954,\">\":356.24288182755123},\"1\":{\"<\":353.3671301085472,\"^\":360.18262750607164,\">\":355.72809235661356}},\"pair0095\":{\"4\":{\"<\":2537.2028719817836,\"^\":2569.24645948879,\">\":2537.8331457526215},\"2\":{\"<\":2663.1481947818743,\"