In [1]:
using GpABC, DifferentialEquations, Distributions, Distances, Plots

In [2]:
#
# Experimental data - from ABCSysBio example at
# https://github.com/jamesscottbrown/abc-sysbio/blob/master/examples/SBML/Example1/SIR.ipynb
#
times = [0.0, 0.6, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
values = [[ 20.     ,  10.     ,   0.     ],
       [  0.12313,  13.16813,   9.42344],
       [  0.12102,   7.17251,  11.18957],
       [  0.09898,   2.36466,  10.0365 ],
       [  0.37887,   0.92019,   6.87117],
       [  1.00661,   0.61958,   4.44955],
       [  1.20135,   0.17449,   3.01271],
       [  1.46433,   0.28039,   1.76431],
       [  1.37789,   0.0985 ,   1.28868],
       [  1.57073,   0.03343,   0.81813],
       [  1.4647 ,   0.28544,   0.52111],
       [  1.24719,   0.10138,   0.22746],
       [  1.56065,   0.21671,   0.19627]]

values = hcat(values...)

#
# Priors and initial conditions - these are model-specfic as each model can 
# have different numbers of parameters/species
#
priors1 = [Uniform(0.0, 5.0) for i in 1:4]
ic1 = [20.0, 10.0, 0.0]

priors2 = vcat([Uniform(0.0, 5.0) for i in 1:4], Uniform(0.0, 10.0))
ic2 = [20.0, 0.0, 10.0, 0.0]

priors3 = vcat([Uniform(0.0, 5.0) for i in 1:4], Uniform(0.0, 10.0))
ic3 = [20.0, 10.0, 0.0]

#modelprior = DiscreteUniform(1, 3)
modelprior = DiscreteUniform(1, 2)

# p = (alpha, gamma, d, v)
# x = (S, I, R)
function model1(dx, x, p, t)
    dx[1] = p[1] - p[2]*x[1]*x[2] - p[3]*x[1] # dS/dt = alpha - gamma*S*I - d*S
    dx[2] = p[3]*x[1]*x[2] - p[4]*x[2] - p[3]*x[2] # dI/dt = gamma*S*I - v*I - d*I
    dx[3] = p[4]*x[2] - p[3]*x[3] # dR/dt = v*I - d*R
end

# p = (alpha, gamma, d, v, delta)
# x = (S, L, I, R)
function model2(dx, x, p, t)
    dx[1] = p[1] - p[2]*x[1]*x[3] - p[3]*x[1] # dS/dt = alpha - gamma*S*I - d*S
    dx[2] = p[2]*x[1]*x[3] - p[5]*x[2] - p[3]*x[2] # dL/dt = gamma*S*I - delta*L - d*L
    dx[3] = p[5]*x[2] - p[4]*x[3] - p[3]*x[3] # dI/dt = delta*L - v*I - d*I
    dx[4] = p[4]*x[3] - p[3]*x[4] # dR/dt = v*I - d*R
end

# p = (alpha, gamma, d, v, e)
# x = (S, I, R)
function model3(dx, x, p, t)
    dx[1] = p[1] - p[2]*x[1]*x[2] - p[3]*x[1] + p[5]*x[3] # dS/dt = alpha - gamma*S*I - d*S + e*R
    dx[2] = p[3]*x[1]*x[2] - p[4]*x[2] - p[3]*x[2] # dI/dt = gamma*S*I - v*I - d*I
    dx[3] = p[4]*x[2] - (p[3]+p[5])*x[3] # dR/dt = v*I - (d+e)*R
end

#ics = [ic1, ic2, ic3]
ics = [ic1, ic2]

# Define simulator functions

simulator1(params) = Array{Float64,2}(
    solve(ODEProblem(model1, ics[1], (times[1], times[end]), params), saveat=times, force_dtmin=true))

# Model2 contains the species L, which is not measured - we remove it from the returned ODE solution
# so that it can be compared to the reference data "values", which only contains S, I and R
simulator2(params) = Array{Float64,2}(
    solve(ODEProblem(model2, ics[2], (times[1], times[end]), params), saveat=times, force_dtmin=true))[[1,3,4],:]

simulator3(params) = Array{Float64,2}(
    solve(ODEProblem(model3, ics[3], (times[1], times[end]), params), saveat=times, force_dtmin=true))

simulator3 (generic function with 1 method)

In [3]:
if false
    t_schedule = [20, 15, 10, 5, 3, 2.5, 2, 1.7, 1.5]
    #t_schedule = [20, 15, 10, 5]

    input = GpABC.SimulatedModelSelectionInput(2, 200, t_schedule, modelprior,
        [priors1, priors2], "keep_all", euclidean,
        [simulator1, simulator2], 1e4)

    ms_res = GpABC.model_selection(input, values);
end

In [4]:
threshold_schedule = [20, 15, 10]

# Train the emulators
n_design_points = 200
summary_statistic = "keep_all"
distance_metric = euclidean
rt = RepetitiveTraining()
gpkernel = SquaredExponentialArdKernel()

#
# A set of functions that return a trained emulator with a prior sampling function as an argument
#
emulator_trainers = [f(prior_sampler) = GpABC.abc_train_emulator(prior_sampler,
        n_design_points,
        GpABC.build_summary_statistic(summary_statistic)(values),
        sim,
        GpABC.build_summary_statistic(summary_statistic),
        distance_metric)
    for sim in [simulator1, simulator2]]

emulator_settings = [AbcEmulationSettings(n_design_points,
        trainer,
        (x, em) -> gp_regression(x, em)) for trainer in emulator_trainers]

input = EmulatedModelSelectionInput(2, 200, threshold_schedule, modelprior, [priors1, priors2],
    emulator_settings, 100, 1e3)

GpABC.model_selection(input, values)

Emulated model selection
Population 1
Trained emulators
ITERATION 1
0 particles accepted in total
Number of sampled models: Model 1: 60	Model 2: 40
Running rejection batch of size 60 for model 1
Accepted 24 particles for model 1
Running rejection batch of size 40 for model 2
Accepted 14 particles for model 2


ITERATION 2
38 particles accepted in total
Number of sampled models: Model 1: 46	Model 2: 54
Running rejection batch of size 46 for model 1
Accepted 14 particles for model 1
Running rejection batch of size 54 for model 2
Accepted 13 particles for model 2


ITERATION 3
65 particles accepted in total
Number of sampled models: Model 1: 42	Model 2: 58
Running rejection batch of size 42 for model 1
Accepted 17 particles for model 1
Running rejection batch of size 58 for model 2
Accepted 17 particles for model 2


ITERATION 4
99 particles accepted in total
Number of sampled models: Model 1: 60	Model 2: 40
Running rejection batch of size 60 for model 1
Accepted 24 particles for model 1


LoadError: [91mtype EmulatedModelSelectionTracker has no field model_tracker[39m

In [None]:
function prior_sampler(n_dp, priors)
    println("n_dp = $n_dp")
    out = hcat([rand(p, n_dp) for p in priors]...)
    println(size(out))
    return out
end

f1(x) = emulator_trainers[1](x -> prior_sampler(x, priors1))
f2(x) = emulator_trainers[2](x -> prior_sampler(x, priors2))
f3(x) = emulator_trainers[3](x -> prior_sampler(x, priors3))

# f1(2)
# f2(10)
# f3(10)

f(x) = prior_sampler(x, priors1)
f1(10)
f(10)

In [None]:
using StatsBase

w1 = Weights([0.1,0.3])
vcat([0.1], w1)

In [None]:
n_design_points = 200
X = hcat([rand(p, n_design_points) for p in priors3]...)
sum_stat = GpABC.build_summary_statistic(summary_statistic)
sum_ref = sum_stat(values)
y = [distance_metric(sum_stat(simulator3(X[i,:])), sum_ref) for i = 1:n_design_points]
gpem = GPModel(training_x = X, training_y = y)
gp_train(gpem)
fhat, V = gp_regression(X, gpem, full_covariance_matrix=true)

In [None]:
println(y)
println()
f_draw = rand(MvNormal(fhat, Array(Hermitian(V))))
println(f_draw)

In [None]:
using Plots
gr();
eps = 1e3
scatter(y[abs.(f_draw).<eps], f_draw[abs.(f_draw).<eps])