In [None]:
1+2

# Model size

In [None]:
import Printf

In [None]:
function test_size()
    # model size
    @info "FC model"
    for d in [7, 10,15,20,25,30]
        Printf.@printf "%.2f\n" param_count(fc_model_fn(d)) / 1e6
    end
    @info "FC deep model"
    for d in [7, 10,15,20,25,30]
        Printf.@printf "%.2f\n" param_count(deep_fc_model_fn(d)) / 1e6
    end
    # EQ models is independent of input size
    @info "EQ model"
    Printf.@printf "%.2f\n" param_count(eq_model_fn(10)) / 1e6
    Printf.@printf "%.2f\n" param_count(deep_eq_model_fn(10)) / 1e6
end

# Old Main

In [None]:
# setup GPU memory limit

# For 1070
g=5.0
# For 2080 Ti
# g=9.0

ENV["JULIA_CUDA_MEMORY_LIMIT"] = convert(Int, round(g * 1024 * 1024 * 1024))

In [None]:
include("main.jl")

In [None]:
main_CNN_sep
main_EQ_sep

In [None]:
main_CNN_sep()

In [None]:
main_EQ_sep()

In [None]:
main_FC()

In [None]:
main_CNN_ensemble()

In [None]:
main_EQ_ensemble()

In [None]:
main_EQ_cov()

# Train ensembly with different K

- because K is probably the most important hyper-parameter for real data
- [X] the data could be loaded more effectively, e.g. inside one dataset `ds`. Then the data can be completely mixed.

experiments:
- [X] try ER/SF graphs
- [ ] try COR mat
- [-] try normalized COV (medCOV and maxCOV)
- [ ] try COR + VAR as input
- [X] try similar k (1,2,3,4]: this works
- [X] try larger batch UPDATE but 100 should be large enough

In [None]:
include("exp.jl")

In [None]:
# first try to use multiple dses for training
specs = []
for k in [1,5,10,20]
    push!(specs, DataSpec(d=11, k=k, gtype=:SF, noise=:Gaussian, mat=:maxCOV))
end
specs = Array{DataSpec}(specs)

In [None]:
# train
expID = exp_train(specs, deep_eq_model_fn, prefix="ensK", train_steps=1e4)

In [None]:
expID = exp_train(specs, deep_eq_model_fn, prefix="ensK-$(now())", train_steps=1e4, merge=true)

In [None]:
# or I could train with K specifically designed for Sachs-2005


In [None]:
# construct many dses
ds, test_ds = spec2ds(specs)

In [None]:
# merge data
function merge_dses(dses)
    # merge datasets
    # 1. merge raw_x and raw_y
    raw_x = cat([ds.raw_x for ds in dses]..., dims=3)
    raw_y = cat([ds.raw_y for ds in dses]..., dims=3)
    # 2. assert batch_size
    batch_size = dses[1].batch_size
    # 4. construct new ds
    DataSetIterator(raw_x, raw_y, batch_size)
end

In [None]:
size(ds[1].raw_x)

In [None]:
ds

In [None]:
merge_dses(ds)

# New version of main

- refactor the code of main1
- use seeding
- pre-gen graphs and splitting
- use cloud GPU for training

- [ ] I don't need correlation experiments anymore

In [None]:
include("main2.jl")

In [None]:
main2()

In [None]:
main_ensemble()

# Sachs 2005 experiment

In [None]:
import CSV

In [None]:
df = CSV.read("Sachs/1.cd3cd28.csv")

In [None]:
SachsX = convert(Matrix, df)

In [None]:
include("data_graph.jl")

In [None]:
SachsG = Sachs_ground_truth()

In [None]:
myplot(SachsG)

In [None]:
medcovX = cov(SachsX) ./ median(var(SachsX, dims=1))

In [None]:
maxcovX = cov(SachsX) ./ maximum(var(SachsX, dims=1))

In [None]:
corX = cor(SachsX)

In [None]:
include("exp.jl")

In [None]:
# load the trained model
@load "saved_models/EQ-d=20_k=1_gtype=SF_noise=Gaussian_mat=medCOV_mec=Linear/step-15000.bson" model

In [None]:
@load "saved_models/EQ-d=10_k=1_gtype=ER_noise=Gaussian_mat=medCOV_mec=Linear/step-15000.bson" model

In [None]:
@load "back/back-0907/CORCOV/EQ-d=10_k=1_gtype=SF_noise=Gaussian_mat=COR_mec=Linear/step-15000.bson" model

In [None]:
@load "saved_models/ensK-2020-09-08T10:58:41.247-ensemble/step-10000.bson" model

In [None]:
out = inf_one(model, medcovX)

In [None]:
out = inf_one(model, corX)

In [None]:
out = inf_one(model, maxcovX)

In [None]:
Wout = threshold(σ.(out), 0.5, true)

In [None]:
myplot(DiGraph(Wout), names(df))

In [None]:
myplot(SachsG, names(df))

In [None]:
# predicted edge, true edge, SHD
predicted_edge = ne(DiGraph(Wout))
@show predicted_edge
correct_edge = sum(Wout[Wout .== 1] .== adjacency_matrix(SachsG)[Wout .== 1])
@show correct_edge

# metrics
ytrue = Matrix(gen_weights(SachsG))
sup_graph_metrics(Wout, ytrue)

# Some rather random testing

In [None]:
exp_train(DataSpec(d=10, k=1, gtype=:SF,
        noise=:Gaussian, mechanism=:Linear, mat=:COV),
    deep_eq_model_fn, prefix="test-EQ", train_steps=1e4)

In [None]:
exp_train(DataSpec(d=10, k=1, gtype=:ER,
        noise=:Gaussian, mechanism=:Linear, mat=:COV),
    deep_eq_model_fn, prefix="test-EQ", train_steps=1e4)

In [None]:
exp_train(DataSpec(d=20, k=1, gtype=:SF,
        noise=:Gaussian, mechanism=:Linear, mat=:COV),
    deep_eq_model_fn, prefix="test-EQ", train_steps=1e4)

In [None]:
# Test the model
spec = DataSpec(d=10, k=1, gtype=:SF,
        noise=:Gaussian, mechanism=:Linear, mat=:COV)
expID = "EQ-$(dataspec_to_id(spec))"

In [None]:
exp_test(expID, spec, use_raw=true)

In [None]:
spec

In [None]:
function adhoc_test(expID, spec)
    model, _ = joinpath("saved_models", expID) |> load_most_recent
    model = gpu(model)
    ds, test_ds = spec2ds(spec)
    metrics, t = sup_test_raw(model, test_ds, nbatch=16)
    return metrics
end

In [None]:
adhoc_test(expID, spec)

In [None]:
dataspec_to_id(spec)

In [None]:
_results[expID=>"raw-$(dataspec_to_id(spec))"]