# Accuracy-based BO example

In [7]:
include("src/BayesianCausalityOptimization.jl");

using .BayesianCausalityOptimization

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);
#pair_ids = ["001","007","016","025","034","049","064","072","073","078","086","087","088","096","100"]
#selected_data = create_data(tuebingen_data, pair_ids, 1000);

T_max = 10
Rs = 5:5
EPOCHS = 10
M = 2
P = 10
n_iterations = 10

optimizer, optimum = bayesian_optimization(tuebingen_data, T_max;Rs=Rs, EPOCHS=EPOCHS, M=M, P=P, 
                                            n_iterations=n_iterations)

# Likelihood-based BO

In [1]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";

In [2]:
valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);

│   caller = ip:0x0
└ @ Core :-1


In [3]:
redirect_stdout(prepare_empty_parameter_dictionary(1:5, tuebingen_data[1:3]), stdout)

UndefVarError: UndefVarError: prepare_empty_parameter_dictionary not defined

In [4]:
function prepare_empty_parameter_dictionary(Rs::UnitRange{Int64}, data::Array{Dict{Symbol,Any},1})
    DIRECTION = ['>','<', '^']
    resulting_parameters = Dict()
    likelihoods = Dict()
    for (id, pair) ∈ enumerate(data)
        resulting_parameters[pair[:id]] = Dict()
        likelihoods[pair[:id]] = Dict()
        for R in Rs
            resulting_parameters[pair[:id]][R] = Dict()
            likelihoods[pair[:id]][R] = Dict()
            for direction in DIRECTION
                resulting_parameters[pair[:id]][R][direction] = Dict()
                likelihoods[pair[:id]][R][direction] = Dict()
            end
        end
    end
    return resulting_parameters, likelihoods
end

function bayesian_optimization_likelihood(xⁿ, yⁿ; R, EPOCHS, M, P, n_iterations, m₁, m₂)
    config = ConfigParameters()        # calls initialize_parameters_to_default of the C API
    config.n_iterations = n_iterations;
    config.verbose_level = 0
    set_kernel!(config, "kMaternARD5")  # calls set_kernel of the C API
    config.sc_type = SC_MAP;
    f((γ, λ₁, λ₂, a₁, a₂, b₁, b₂)) = -v_bayes([xⁿ yⁿ],R;M=M,γ=exp(γ),m₁=m₁,m₂=m₂,λ₁=exp(λ₁),λ₂=exp(λ₂),
        a₁=exp(a₁),a₂=exp(a₂),b₁=exp(b₁),b₂=exp(b₂),EPOCHS=EPOCHS)[1][end]
    param_keys = ["γ", "λ₁", "λ₂", "a₁", "a₂", "b₁", "b₂"]
    lowerbound = [log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001)]; 
    upperbound = [log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.)];
    @time optimizer, optimum = bayes_optimization(f, lowerbound, upperbound, config)
    return Dict(zip(param_keys, optimizer)), optimum
    end;
                    
function vb_hyperparameter_optimization(data,T_max=Inf; name="hp_opt_likelihood", M=1, P=1, Rs=1:1, m₁=0.0, m₂=0.0,
        EPOCHS=1, warm_start_type="free", n_iterations=200, norm=true)
    mkpath("$RESULTS_PATH/$(name)_$(warm_start_type)")
    DIRECTION = ['>','<', '^']
    parameters, likelihoods = prepare_empty_parameter_dictionary(Rs, data)
    for (id,pair) ∈ enumerate(data)
    	print(pair[:id],"...\t\t\t")
        T = length(pair[:X])
        perm = randperm(T)[1:Int(min(T,T_max))]
        x, y = pair[:X][perm], pair[:Y][perm]
        for R in Rs
            println(" ", R)
            xⁿ = norm ? (x .- mean(x)) / std(x) : x
            yⁿ = norm ? (y .- mean(y)) / std(y) : y
            parameters[pair[:id]][R]['^'], likelihoods[pair[:id]][R]['^'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=1, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['>'], likelihoods[pair[:id]][R]['>'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['<'], likelihoods[pair[:id]][R]['<'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
        end        
    end
    metaparameters = Dict("M"=>M, "EPOCHS"=>EPOCHS, "P"=>P, "m₁"=>m₁, "m₂"=>m₂, "T_max"=>T_max, 
        "Rs"=>Rs, "warm_start_type"=>warm_start_type, "n_iterations"=>n_iterations, 
        "SEED"=>SEED, "norm"=>norm)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/metaparameters.json", metaparameters=metaparameters)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/parameters.json", parameters=parameters);
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/likelihoods.json", likelihoods=likelihoods);
    end;

In [5]:
vb_hyperparameter_optimization(tuebingen_data[26:50],1000; name="hp_opt_likelihood_2", M=4, P=100, Rs=1:5, m₁=0.0, m₂=0.0,
        EPOCHS=200, warm_start_type="free", n_iterations=200)

pair0026...			 1


│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:441
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:441
│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:443
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:443
│   caller = #v_bayes#24(::Int64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Int64, ::typeof(v_bayes), ::Array{Float64,2}, ::Int64) at BayesianCausality.jl:446
└ @ Main.BayesianCausality /home/melih/Desktop/projects/aistats_causality/src/BayesianCausality.jl:446
│   caller = #v_bayes#24(::Int64, ::F

 21.474815 seconds (8.17 M allocations: 580.980 MiB, 0.54% gc time)
 18.167388 seconds (8.16 M allocations: 908.432 MiB, 1.02% gc time)
 15.542065 seconds (8.16 M allocations: 908.432 MiB, 1.13% gc time)
 2
 41.125469 seconds (174.26 M allocations: 12.333 GiB, 6.27% gc time)
 54.840533 seconds (235.51 M allocations: 25.473 GiB, 9.41% gc time)
 53.979178 seconds (221.94 M allocations: 24.018 GiB, 7.84% gc time)
 3
121.999675 seconds (992.36 M allocations: 70.999 GiB, 12.13% gc time)
297.205510 seconds (1.73 G allocations: 190.307 GiB, 12.62% gc time)
294.373410 seconds (1.67 G allocations: 184.459 GiB, 13.59% gc time)
 4
214.124535 seconds (1.94 G allocations: 139.153 GiB, 12.79% gc time)
394.319416 seconds (2.44 G allocations: 270.890 GiB, 12.16% gc time)
403.983985 seconds (2.44 G allocations: 270.880 GiB, 13.60% gc time)
 5
251.241481 seconds (2.29 G allocations: 164.448 GiB, 14.22% gc time)
474.651900 seconds (2.49 G allocations: 276.580 GiB, 13.18% gc time)
573.327515 seconds (2.88

171.541268 seconds (495.03 M allocations: 54.525 GiB, 8.72% gc time)
 4
144.390127 seconds (497.08 M allocations: 36.664 GiB, 6.81% gc time)
255.077159 seconds (806.53 M allocations: 89.315 GiB, 10.45% gc time)
175.225088 seconds (515.28 M allocations: 56.890 GiB, 10.10% gc time)
 5
151.589634 seconds (572.00 M allocations: 42.268 GiB, 8.20% gc time)
230.111424 seconds (715.73 M allocations: 79.439 GiB, 11.41% gc time)
201.494088 seconds (597.37 M allocations: 66.215 GiB, 11.16% gc time)
pair0034...			 1
 22.008505 seconds (2.78 M allocations: 214.448 MiB, 0.28% gc time)
 22.548902 seconds (3.10 M allocations: 345.053 MiB, 0.49% gc time)
 26.006148 seconds (3.07 M allocations: 341.798 MiB, 0.39% gc time)
 2
 81.182230 seconds (225.67 M allocations: 16.607 GiB, 6.06% gc time)
 60.926303 seconds (123.67 M allocations: 13.399 GiB, 6.26% gc time)
 73.630853 seconds (162.95 M allocations: 17.672 GiB, 5.96% gc time)
 3
103.778643 seconds (339.18 M allocations: 25.052 GiB, 6.55% gc time)
171.

 23.178392 seconds (6.32 M allocations: 703.421 MiB, 0.80% gc time)
 2
 67.586613 seconds (257.03 M allocations: 18.339 GiB, 7.87% gc time)
 89.749915 seconds (286.60 M allocations: 31.071 GiB, 9.59% gc time)
 97.916346 seconds (317.16 M allocations: 34.394 GiB, 9.73% gc time)
 3
151.661524 seconds (857.25 M allocations: 61.677 GiB, 10.77% gc time)
216.022334 seconds (909.60 M allocations: 99.949 GiB, 11.07% gc time)
- 16:48:50.482229 ERROR: nlopt failure
129.260033 seconds (560.65 M allocations: 61.707 GiB, 12.17% gc time)
 4
180.618400 seconds (1.10 G allocations: 78.841 GiB, 11.64% gc time)
390.663715 seconds (1.69 G allocations: 187.321 GiB, 13.67% gc time)
325.100153 seconds (1.39 G allocations: 153.721 GiB, 13.96% gc time)
 5
194.171576 seconds (1.18 G allocations: 85.417 GiB, 11.72% gc time)
462.650804 seconds (2.15 G allocations: 239.243 GiB, 12.50% gc time)
388.947955 seconds (1.79 G allocations: 197.841 GiB, 12.52% gc time)
pair0042...			 1
 22.106559 seconds (7.32 M allocati

 88.539243 seconds (268.24 M allocations: 20.571 GiB, 5.40% gc time)
121.216653 seconds (337.02 M allocations: 37.162 GiB, 7.43% gc time)
125.662786 seconds (348.40 M allocations: 38.363 GiB, 7.57% gc time)
pair0049...			 1
 19.079624 seconds (2.92 M allocations: 224.420 MiB, 0.26% gc time)
 20.284788 seconds (3.23 M allocations: 359.076 MiB, 0.38% gc time)
 18.828519 seconds (3.23 M allocations: 359.076 MiB, 0.46% gc time)
 2
- 21:43:22.557092 ERROR: nlopt failure
 11.306335 seconds (46.05 M allocations: 3.380 GiB, 6.68% gc time)
- 21:44:18.377380 ERROR: nlopt failure
 55.819896 seconds (173.96 M allocations: 18.886 GiB, 8.51% gc time)
 79.180914 seconds (248.57 M allocations: 27.001 GiB, 8.80% gc time)
 3
 75.019146 seconds (285.57 M allocations: 21.041 GiB, 6.58% gc time)
117.830646 seconds (425.68 M allocations: 46.812 GiB, 10.07% gc time)
124.303051 seconds (453.59 M allocations: 49.807 GiB, 10.38% gc time)
 4
 99.388007 seconds (470.14 M allocations: 34.587 GiB, 8.62% gc time)
16

"{\"likelihoods\":{\"pair0027\":{\"4\":{\"<\":-746.8087019182344,\"^\":-535.4526225130393,\">\":-838.5622302527338},\"2\":{\"<\":-743.3646466098199,\"^\":-741.2726117239254,\">\":-742.3815747745722},\"3\":{\"<\":-645.4043536850793,\"^\":-755.8541895395623,\">\":-810.4811909335432},\"5\":{\"<\":-771.4267683941795,\"^\":-764.4725522052803,\">\":-755.7112763265707},\"1\":{\"<\":2835.5977335021853,\"^\":2847.609819481923,\">\":2838.5115490627436}},\"pair0028\":{\"4\":{\"<\":2270.3415249684826,\"^\":2577.1658325569024,\">\":2702.3140838570444},\"2\":{\"<\":2742.6034825930724,\"^\":2764.1343871453055,\">\":2744.687641593924},\"3\":{\"<\":2570.340685266846,\"^\":2683.2816073450063,\">\":2665.7506667120842},\"5\":{\"<\":0.0,\"^\":2339.795853115411,\">\":2246.0333244575927},\"1\":{\"<\":2756.933002599709,\"^\":2842.860441259292,\">\":2760.8844626445775}},\"pair0045\":{\"4\":{\"<\":1497.1778832907949,\"^\":1833.009757168033,\">\":1525.380038784609},\"2\":{\"<\":1599.215427077149,\"^\":2203.78344