# Accuracy-based BO example

In [None]:
include("src/BayesianCausalityOptimization.jl");

using .BayesianCausalityOptimization

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);
#pair_ids = ["001","007","016","025","034","049","064","072","073","078","086","087","088","096","100"]
#selected_data = create_data(tuebingen_data, pair_ids, 1000);

T_max = 10
Rs = 5:5
EPOCHS = 10
M = 2
P = 10
n_iterations = 10

optimizer, optimum = bayesian_optimization_accuracy(tuebingen_data, T_max;Rs=Rs, EPOCHS=EPOCHS, M=M, P=P, 
                                            n_iterations=n_iterations)

# Unconstrained Likelihood-based BO Test

In [1]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = Datasets.tuebingen(valid_pairs);

│   caller = ip:0x0
└ @ Core :-1


In [None]:
vb_hyperparameter_optimization(tuebingen_data[1 : 3], 1000; 
name="trying_5/optimization_results/hp_opt_likelihood_1_7", 
M=4, P=100, Rs=1:3, EPOCHS=20, hp_optimization_type="unconstrained", n_iterations=20, norm=true, SEED=SEED, RESULTS_PATH=RESULTS_PATH, m_zero=true)

In [81]:
accuracy = causal_accuracy_preset_parameters(tuebingen_data[1:5],100; hp_folder="./results/tuebingen/vb-hp-opt/hp_opt_likelihood_with_m_unconstrained_seed_100", M=4, P=100, Rs=1:3, EPOCHS=10, seed_no=100, test_method="vb", ratio=1.)

UndefVarError: UndefVarError: tuebingen_data not defined

# Hyperparameter optimization

In [2]:
SEED = 100
seed!(SEED)

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = BayesianCausalityOptimization.Datasets.tuebingen(valid_pairs);

In [None]:
for (id, data) in enumerate(tuebingen_data)
    println(id, " ", length(data[:X]))
end

In [None]:
vb_hyperparameter_optimization(tuebingen_data[1:5], 100; name="hp_opt_likelihood_with_m", M=4, P=100, Rs=1:3, EPOCHS=10, hp_optimization_type="unconstrained", n_iterations=10, norm=true, RESULTS_PATH=RESULTS_PATH)

## Looking at liminal parameters

In [11]:
ps = load_json(string("./results/tuebingen/vb-hp-opt/hp_opt_likelihood_full_data_w_bounds_m_zero_seed_100","/parameters.json"))["parameters"];
other_ps = load_json(string("./results/tuebingen/vb-hp-opt/hp_opt_1000_samples_m_zero","/parameters.json"))["parameters"];

UndefVarError: UndefVarError: load_json not defined

In [41]:
param_counters = Dict(zip(["γ", "b₂", "λ₁", "λ₂", "a₁", "a₂", "b₁"], [0 for i in 1:7]))
p_param_counters = Dict(zip(["γ", "b₂", "λ₁", "λ₂", "a₁", "a₂", "b₁"], [0 for i in 1:7]))
n_param_counters = Dict(zip(["γ", "b₂", "λ₁", "λ₂", "a₁", "a₂", "b₁"], [0 for i in 1:7]))
direction_counters = Dict(zip(["<", ">", "^"], [0 for i in 1:3]))
p_direction_counters = Dict(zip(["<", ">", "^"], [0 for i in 1:3]))
n_direction_counters = Dict(zip(["<", ">", "^"], [0 for i in 1:3]))
counter = 0
limit_counter = 0
positive_counter = 0
negative_counter = 0
higher_limit = 13.5
lower_limit = 9
common_count = 0
uncommon_count = 0
total_count = 0
for key in keys(ps)
    for i in 1:5
        for direction in ["<",">","^"]
            for param in ["γ", "b₂", "λ₁", "λ₂", "a₁", "a₂", "b₁"]
                val = ps[key][string(i)][direction][param]
                val2 = other_ps[key][string(i)][direction][param]
                if ((val>limit) | (val<-limit)) & ((val2>lower_limit) | (val2<-lower_limit))
                    common_count += 1
                elseif ((val>limit) | (val<-limit)) ⊻ ((val2>lower_limit) | (val2<-lower_limit))
                    uncommon_count +=1
                end
                
                counter += 1
                if (val>limit) | (val<-limit)
                    param_counters[param] += 1
                    direction_counters[direction] += 1
                    limit_counter +=1
                end
                if (val>limit)
                    positive_counter += 1
                    p_param_counters[param] += 1
                    p_direction_counters[direction] += 1
                end
                if (val<-limit)
                    negative_counter += 1
                    n_param_counters[param] += 1
                    n_direction_counters[direction] += 1
                end
            end
        end
    end
end
print(limit_counter/counter)

0.15619047619047619

# Zero pseudodata attempts

In [1]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = Datasets.tuebingen(valid_pairs);

│   caller = ip:0x0
└ @ Core :-1


In [7]:
hp_folder = "./results/tuebingen/vb-hp-opt/pseudodata_test";

In [79]:
accuracy = causal_accuracy_preset_parameters(tuebingen_data,1000; hp_folder=hp_folder, M=4, P=100, 
    Rs=1:5, EPOCHS=200, seed_no=100, test_method="vb", ratio=1., zero_pseudodata=true)

UndefVarError: UndefVarError: hp_folder not defined

# Trying different Ms

In [131]:
parse(UnitRange{Int64},"1:5")

MethodError: MethodError: no method matching parse(::Type{UnitRange{Int64}}, ::String)
Closest candidates are:
  parse(!Matched::Type{LibGit2.GitCredential}, ::AbstractString) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/LibGit2/src/gitcredential.jl:73
  parse(!Matched::Type{LibGit2.GitCredentialHelper}, ::AbstractString) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/LibGit2/src/gitcredential.jl:163
  parse(!Matched::Type{Sockets.IPv4}, ::AbstractString) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/Sockets/src/IPAddr.jl:166
  ...

In [117]:
a = reshape(["$(p)_$(M)_$(R)" for M ∈ ["m1", "m2", "m3"] for R ∈ ["r1", "r2", "r3"] for p in ["p1","p2","p3"]],3,3,:);

In [118]:
a[1, :, :]

3×3 Array{String,2}:
 "p1_m1_r1"  "p1_m2_r1"  "p1_m3_r1"
 "p1_m1_r2"  "p1_m2_r2"  "p1_m3_r2"
 "p1_m1_r3"  "p1_m2_r3"  "p1_m3_r3"

In [103]:
a = randn(2,3,4)

2×3×4 Array{Float64,3}:
[:, :, 1] =
  1.42675   1.53639   -2.76433
 -0.136104  0.415135   1.54728

[:, :, 2] =
 -1.86786    0.169477  0.663542
  0.979961  -2.59211   1.46754 

[:, :, 3] =
 -0.82937    0.526713   1.78105 
 -0.211186  -0.262804  -0.718716

[:, :, 4] =
 0.393604   1.42297   0.199023
 0.276449  -0.881488  1.63151 

In [106]:
a[1,:,:]

3×4 Array{Float64,2}:
  1.42675  -1.86786   -0.82937   0.393604
  1.53639   0.169477   0.526713  1.42297 
 -2.76433   0.663542   1.78105   0.199023

In [105]:
nanmax(a, dims=(2, 3))

2×1×1 Array{Float64,3}:
[:, :, 1] =
 1.7810522798268196
 1.6315141352976112

In [127]:
causal_accuracy_preset_parameters_Ms(tuebingen_data[1:1], Inf; hp_folder=hp_folder, Ms=2:5, Rs=1:5,
    EPOCHS=20, seed_no=101, test_method="vb", norm=true, ratio=1.0, pseudodata_type="kmeans", m_zero=true)

3×5×4 Array{String,3}:
[:, :, 1] =
 ">_M_2_R_1"  ">_M_2_R_2"  ">_M_2_R_3"  ">_M_2_R_4"  ">_M_2_R_5"
 "<_M_2_R_1"  "<_M_2_R_2"  "<_M_2_R_3"  "<_M_2_R_4"  "<_M_2_R_5"
 "^_M_2_R_1"  "^_M_2_R_2"  "^_M_2_R_3"  "^_M_2_R_4"  "^_M_2_R_5"

[:, :, 2] =
 ">_M_3_R_1"  ">_M_3_R_2"  ">_M_3_R_3"  ">_M_3_R_4"  ">_M_3_R_5"
 "<_M_3_R_1"  "<_M_3_R_2"  "<_M_3_R_3"  "<_M_3_R_4"  "<_M_3_R_5"
 "^_M_3_R_1"  "^_M_3_R_2"  "^_M_3_R_3"  "^_M_3_R_4"  "^_M_3_R_5"

[:, :, 3] =
 ">_M_4_R_1"  ">_M_4_R_2"  ">_M_4_R_3"  ">_M_4_R_4"  ">_M_4_R_5"
 "<_M_4_R_1"  "<_M_4_R_2"  "<_M_4_R_3"  "<_M_4_R_4"  "<_M_4_R_5"
 "^_M_4_R_1"  "^_M_4_R_2"  "^_M_4_R_3"  "^_M_4_R_4"  "^_M_4_R_5"

[:, :, 4] =
 ">_M_5_R_1"  ">_M_5_R_2"  ">_M_5_R_3"  ">_M_5_R_4"  ">_M_5_R_5"
 "<_M_5_R_1"  "<_M_5_R_2"  "<_M_5_R_3"  "<_M_5_R_4"  "<_M_5_R_5"
 "^_M_5_R_1"  "^_M_5_R_2"  "^_M_5_R_3"  "^_M_5_R_4"  "^_M_5_R_5"

3×5×4 Array{Float64,3}:
[:, :, 1] =
 -772.492   -739.942   -783.346   -809.859   -691.305 
 -772.37    -740.194   -783.967   -754.827   -651.2   
   -1.0e10    -1.0e10    -1.0e10    -1.0e10    -1.0e10

[:, :, 2] =
 -767.006   -698.088   -710.833   -671.151   -692.717 
 -802.125   -750.932   -748.376   -751.275   -706.619 
   -1.0e10    -1.0e10    -1.0e10    -1.0e10    -1.0e10

[:, :, 3] =
 -761.44    -692.711   -741.113   -766.41    -724.59  
 -778.735   -693.31    -763.337   -761.075   -691.375 
   -1.0e10    -1.0e10    -1.0e10    -1.0e10    -1.0e10

[:, :, 4] =
 -769.431   -703.978   -814.483   -734.936   -753.088 
 -765.173   -727.805   -734.459   -740.3     -814.086 
   -1.0e10    -1.0e10    -1.0e10    -1.0e10    -1.0e10

Dict{String,Float64} with 60 entries:
  "<_M_2_R_1" => -772.37
  "<_M_4_R_2" => -693.31
  "^_M_4_R_4" => -1.0e10
  ">_M_2_R_2" => -739.942
  "^_M_3_R_2" => -1.0e10
  "<_M_5_R_5" => -814.086
  "<_M_2_R_5" => -651.2
  ">_M_2_R_4" => -809.859
  "^_M_5_R_2" => -1.0e10
  ">_M_2_R_3" => -783.346
  "<_M_4_R_4" => -761.075
  "<_M_5_R_4" => -740.3
  "<_M_4_R_1" => -778.735
  ">_M_4_R_5" => -724.59
  ">_M_5_R_1" => -769.431
  ">_M_5_R_5" => -753.088
  "^_M_2_R_2" => -1.0e10
  "<_M_3_R_1" => -802.125
  "<_M_3_R_2" => -750.932
  "^_M_4_R_3" => -1.0e10
  "^_M_2_R_4" => -1.0e10
  "^_M_2_R_3" => -1.0e10
  "<_M_4_R_5" => -691.375
  "^_M_3_R_1" => -1.0e10
  ">_M_3_R_5" => -692.717
  ⋮           => ⋮

3-element Array{Float64,1}:
 -671.1512201994304
 -651.1998113084147
   -1.0e10         

pair0001...			false


0.0

In [139]:
ceil(3.2)

4.0

In [140]:
using GaussianMixtures: GMM


┌ Info: Recompiling stale cache file /home/melih/.julia/compiled/v1.0/GaussianMixtures/1kPVN.ji for GaussianMixtures [cc18c42c-b769-54ff-9e2a-b28141a64aae]
└ @ Base loading.jl:1190
│ This may mean Distributions [31c24e10-a181-5473-b8eb-7969acd0382f] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947


In [141]:
?GMM

search: [0m[1mG[22ma[0m[1mm[22m[0m[1mm[22ma [0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma [0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma_inc [0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma_inc_inv l[0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma l[0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma_r di[0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma tri[0m[1mg[22ma[0m[1mm[22m[0m[1mm[22ma



`GMM` is the type that stores information of a Guassian Mixture Model.  Currently two main covariance types are supported: full covarariance and diagonal covariance.

---

`GMM(n::Int, d::Int, kind::Symbol=:diag)` initializes a GMM with means 0 and Indentity covariances


In [142]:
using ScikitLearn: fit!

┌ Info: Recompiling stale cache file /home/melih/.julia/compiled/v1.0/ScikitLearn/tbUuI.ji for ScikitLearn [3646fa90-6ef7-5e7e-9f22-8aca16db6324]
└ @ Base loading.jl:1190
│ This may mean StatsBase [2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947


In [144]:
using Clustering: kmeans

In [146]:
results = kmeans(randn(2,30),3);

In [147]:
results.assignments

30-element Array{Int64,1}:
 1
 2
 1
 2
 3
 1
 3
 1
 2
 1
 1
 1
 1
 ⋮
 3
 2
 3
 3
 3
 3
 3
 2
 2
 3
 1
 1

In [137]:
size(zeros(5,3))

(5, 3)

In [82]:
include("src/BayesianCausalityOptimization.jl");
include("src/BayesianCausality.jl");
include("src/Datasets.jl");
include("src/Misc.jl");

using .Misc, .BayesianCausality, .BayesianCausalityOptimization
using .Datasets
using Statistics, Distributions
using SpecialFunctions, LinearAlgebra
import Base.Iterators: product
import Random: randperm, seed!
using BayesOpt
import Base.Iterators: enumerate
import Base.Filesystem: mkpath

SEED = 100
seed!(SEED)
RESULTS_PATH = "./results/tuebingen/vb-hp-opt"
PARAMS_PATH = "./params";

hp_folder = "$RESULTS_PATH/multiple_Ms_attempt"

valid_pairs = setdiff(1:100,[52,53,54,55,71])
tuebingen_data = Datasets.tuebingen(valid_pairs);

│   caller = ip:0x0
└ @ Core :-1


In [88]:
causal_accuracy_preset_parameters_Ms(tuebingen_data[1:1], Inf; hp_folder=hp_folder, M=4, Rs=1:5,
    EPOCHS=20, seed_no=100, test_method="vb", norm=true, ratio=1.0, pseudodata_type="kmeans", m_zero=true)

pair0001...			[-768.493, -765.66, -1.0e10, -727.897, -737.67, -1.0e10, -729.374, -721.033, -1.0e10, -752.406, -725.817, -1.0e10, -754.266, -714.758, -1.0e10]
[-759.298 -744.73 -774.471 -927.919 -751.426; -778.715 -746.392 -723.551 -748.995 -707.407; -1.0e10 -1.0e10 -1.0e10 -1.0e10 -1.0e10]
false


0.0