# Accuracy-based BO example

In [7]:
include("src/BayesianCausalityOptimization.jl");

using .BayesianCausalityOptimization

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);
#pair_ids = ["001","007","016","025","034","049","064","072","073","078","086","087","088","096","100"]
#selected_data = create_data(tuebingen_data, pair_ids, 1000);

T_max = 10
Rs = 5:5
EPOCHS = 10
M = 2
P = 10
n_iterations = 10

optimizer, optimum = bayesian_optimization(tuebingen_data, T_max;Rs=Rs, EPOCHS=EPOCHS, M=M, P=P, 
                                            n_iterations=n_iterations)

# Likelihood-based BO

In [14]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";



In [15]:
valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);

In [16]:
redirect_stdout(prepare_empty_parameter_dictionary(1:5, tuebingen_data[1:3]), stdout)

MethodError: MethodError: no method matching redirect_stdout(::Tuple{Dict{Any,Any},Dict{Any,Any}}, ::IJulia.IJuliaStdio{Base.PipeEndpoint})
Closest candidates are:
  redirect_stdout(!Matched::Function, ::Any) at stream.jl:979

In [17]:
function prepare_empty_parameter_dictionary(Rs::UnitRange{Int64}, data::Array{Dict{Symbol,Any},1})
    DIRECTION = ['>','<', '^']
    resulting_parameters = Dict()
    likelihoods = Dict()
    for (id, pair) ∈ enumerate(data)
        resulting_parameters[pair[:id]] = Dict()
        likelihoods[pair[:id]] = Dict()
        for R in Rs
            resulting_parameters[pair[:id]][R] = Dict()
            likelihoods[pair[:id]][R] = Dict()
            for direction in DIRECTION
                resulting_parameters[pair[:id]][R][direction] = Dict()
                likelihoods[pair[:id]][R][direction] = Dict()
            end
        end
    end
    return resulting_parameters, likelihoods
end

function bayesian_optimization_likelihood(xⁿ, yⁿ; R, EPOCHS, M, P, n_iterations, m₁, m₂)
    config = ConfigParameters()        # calls initialize_parameters_to_default of the C API
    config.n_iterations = n_iterations;
    config.verbose_level = 0
    set_kernel!(config, "kMaternARD5")  # calls set_kernel of the C API
    config.sc_type = SC_MAP;
    f((γ, λ₁, λ₂, a₁, a₂, b₁, b₂)) = -v_bayes([xⁿ yⁿ],R;M=M,γ=exp(γ),m₁=m₁,m₂=m₂,λ₁=exp(λ₁),λ₂=exp(λ₂),
        a₁=exp(a₁),a₂=exp(a₂),b₁=exp(b₁),b₂=exp(b₂),EPOCHS=EPOCHS)[1][end]
    param_keys = ["γ", "λ₁", "λ₂", "a₁", "a₂", "b₁", "b₂"]
    lowerbound = [log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001), log(.0001)]; 
    upperbound = [log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.), log(10000.)];
    @time optimizer, optimum = bayes_optimization(f, lowerbound, upperbound, config)
    return Dict(zip(param_keys, optimizer)), optimum
    end;
                    
function vb_hyperparameter_optimization(data,T_max=Inf; name="hp_opt_likelihood", M=1, P=1, Rs=1:1, m₁=0.0, m₂=0.0,
        EPOCHS=1, warm_start_type="free", n_iterations=200, norm=true)
    mkpath("$RESULTS_PATH/$(name)_$(warm_start_type)")
    DIRECTION = ['>','<', '^']
    parameters, likelihoods = prepare_empty_parameter_dictionary(Rs, data)
    for (id,pair) ∈ enumerate(data)
    	print(pair[:id],"...\t\t\t")
        T = length(pair[:X])
        perm = randperm(T)[1:Int(min(T,T_max))]
        x, y = pair[:X][perm], pair[:Y][perm]
        for R in Rs
            println(" ", R)
            xⁿ = norm ? (x .- mean(x)) / std(x) : x
            yⁿ = norm ? (y .- mean(y)) / std(y) : y
            parameters[pair[:id]][R]['^'], likelihoods[pair[:id]][R]['^'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=1, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['>'], likelihoods[pair[:id]][R]['>'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
            parameters[pair[:id]][R]['<'], likelihoods[pair[:id]][R]['<'] = bayesian_optimization_likelihood(xⁿ, yⁿ; 
                R=R, EPOCHS=EPOCHS, M=M, P=P, n_iterations=n_iterations, m₁=m₁, m₂=m₂)
        end        
    end
    metaparameters = Dict("M"=>M, "EPOCHS"=>EPOCHS, "P"=>P, "m₁"=>m₁, "m₂"=>m₂, "T_max"=>T_max, 
        "Rs"=>Rs, "warm_start_type"=>warm_start_type, "n_iterations"=>n_iterations, 
        "SEED"=>SEED, "norm"=>norm)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/metaparameters.json", metaparameters=metaparameters)
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/parameters.json", parameters=parameters);
    save_json("$RESULTS_PATH/$(name)_$(warm_start_type)/likelihoods.json", likelihoods=likelihoods);
    end;

In [18]:
vb_hyperparameter_optimization(tuebingen_data[1:25],1000; name="hp_opt_likelihood_1", M=4, P=100, Rs=1:5, m₁=0.0, m₂=0.0,
        EPOCHS=200, warm_start_type="free", n_iterations=200)

pair0001...			 1
 19.049168 seconds (2.82 M allocations: 216.550 MiB, 0.29% gc time)
 18.866863 seconds (3.11 M allocations: 345.233 MiB, 0.40% gc time)
 19.776758 seconds (3.11 M allocations: 345.233 MiB, 0.36% gc time)
 2
 41.293155 seconds (106.51 M allocations: 7.833 GiB, 4.46% gc time)
 68.965048 seconds (197.27 M allocations: 21.418 GiB, 7.06% gc time)
 55.865412 seconds (150.30 M allocations: 16.311 GiB, 6.74% gc time)
 3
 69.089245 seconds (266.54 M allocations: 19.669 GiB, 7.11% gc time)
 99.556117 seconds (333.19 M allocations: 36.650 GiB, 9.03% gc time)
 96.064717 seconds (314.37 M allocations: 34.565 GiB, 9.26% gc time)
 4
 64.247158 seconds (249.44 M allocations: 18.376 GiB, 7.35% gc time)
133.824074 seconds (500.14 M allocations: 55.249 GiB, 8.36% gc time)
134.583520 seconds (497.00 M allocations: 54.885 GiB, 9.04% gc time)
 5
 83.238684 seconds (379.23 M allocations: 28.008 GiB, 7.73% gc time)
200.054366 seconds (725.32 M allocations: 80.422 GiB, 10.30% gc time)
258.4230

141.224910 seconds (868.14 M allocations: 62.095 GiB, 11.92% gc time)
251.092582 seconds (1.12 G allocations: 122.684 GiB, 13.28% gc time)
255.055255 seconds (1.10 G allocations: 120.637 GiB, 13.53% gc time)
 4
- 13:09:43.557074 ERROR: nlopt failure
100.141968 seconds (734.64 M allocations: 52.578 GiB, 14.29% gc time)
357.285388 seconds (1.66 G allocations: 183.616 GiB, 12.37% gc time)
340.626465 seconds (1.64 G allocations: 180.465 GiB, 12.91% gc time)
 5
276.838656 seconds (1.97 G allocations: 141.576 GiB, 13.36% gc time)
411.741823 seconds (1.93 G allocations: 213.989 GiB, 14.54% gc time)
- 13:38:29.556732 ERROR: nlopt failure
339.505926 seconds (1.68 G allocations: 185.894 GiB, 12.86% gc time)
pair0011...			 1
 23.004609 seconds (7.32 M allocations: 541.813 MiB, 0.54% gc time)
 21.286615 seconds (8.16 M allocations: 908.432 MiB, 0.96% gc time)
 23.547091 seconds (8.16 M allocations: 908.432 MiB, 0.87% gc time)
 2
 72.758639 seconds (355.99 M allocations: 25.241 GiB, 8.47% gc time)


105.729067 seconds (209.33 M allocations: 23.054 GiB, 7.08% gc time)
 93.706042 seconds (186.56 M allocations: 20.581 GiB, 6.86% gc time)
 5
115.025520 seconds (298.20 M allocations: 22.667 GiB, 5.86% gc time)
122.827874 seconds (259.05 M allocations: 28.739 GiB, 7.78% gc time)
- 19:06:12.106750 ERROR: nlopt failure
 24.828108 seconds (65.65 M allocations: 7.235 GiB, 10.15% gc time)
pair0020...			 1
 25.921604 seconds (2.81 M allocations: 216.422 MiB, 0.24% gc time)
 20.442009 seconds (3.11 M allocations: 345.233 MiB, 0.42% gc time)
 21.763502 seconds (3.11 M allocations: 345.233 MiB, 0.38% gc time)
 2
 57.913533 seconds (137.88 M allocations: 10.139 GiB, 4.67% gc time)
 68.635643 seconds (160.68 M allocations: 17.435 GiB, 6.86% gc time)
 62.266865 seconds (123.02 M allocations: 13.342 GiB, 5.86% gc time)
 3
 88.095801 seconds (262.84 M allocations: 19.400 GiB, 6.49% gc time)
115.496854 seconds (310.46 M allocations: 34.120 GiB, 8.14% gc time)
 95.263874 seconds (278.94 M allocations: 

"{\"likelihoods\":{\"pair0007\":{\"4\":{\"<\":2385.188883001789,\"^\":2469.8921171941897,\">\":2307.5035822854015},\"2\":{\"<\":2370.430425816063,\"^\":2618.3313719689404,\">\":2374.136250049068},\"3\":{\"<\":2379.7418207046185,\"^\":2498.4970364163537,\">\":2374.0350961118356},\"5\":{\"<\":2223.8245328481216,\"^\":2436.885540701786,\">\":1458.5718675211165},\"1\":{\"<\":2466.605052721935,\"^\":2844.243363120336,\">\":2464.8888899356803}},\"pair0023\":{\"4\":{\"<\":1192.1621484436712,\"^\":1215.6630704046092,\">\":1189.3093882206192},\"2\":{\"<\":0.0,\"^\":1241.0012197531807,\">\":1186.0993725058067},\"3\":{\"<\":1190.6918252943124,\"^\":1230.3697692984183,\">\":1187.8623785654129},\"5\":{\"<\":1193.507390388223,\"^\":1204.3842685824247,\">\":1191.6682184022109},\"1\":{\"<\":1205.0035494537924,\"^\":0.0,\">\":1200.9476510385066}},\"pair0020\":{\"4\":{\"<\":848.2492838239342,\"^\":871.9005644160068,\">\":851.8279148732862},\"2\":{\"<\":874.7452122487997,\"^\":903.2765104586172,\">\":882