/
simulation.jl
179 lines (154 loc) · 9.92 KB
/
simulation.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""
SimulatedABCRejection(
reference_data,
simulator_function,
priors,
threshold,
n_particles;
summary_statistic = "keep_all",
distance_function = Distances.euclidean,
max_iter = 10 * n_particles,
write_progress = true,
progress_every = 1000,
)
Run simulation-based rejection ABC algorithm. Particles are sampled from the prior, and the model is simulated for
each particle. Only those particles are included in the posterior that have distance between the simulation results
and the reference data below the threshold (after taking summary statistics into account).
See [ABC Overview](@ref abc-overview) for more details.
# Mandatory arguments
- `reference_data::AbstractArray{Float,2}`: Observed data to which the simulated model output will be compared. Array dimensions should match that of the simulator function result.
- `simulator_function::Function`: A function that takes a parameter vector as an argument and outputs model results.
- `priors::AbstractArray{ContinuousUnivariateDistribution,1}`: Continuous univariate distributions, from which candidate parameters will be sampled. Array size should match the number of parameters.
- `threshold::Float`: The ``\\varepsilon`` threshold to be used in ABC algorithm. Only those particles that produce simulated results that are within this threshold from the reference data are included into the posterior.
- `n_particles::Int`: The number of parameter vectors (particles) that will be included in the posterior.
# Optional keyword arguments
- `summary_statistic::Union{String,AbstractArray{String,1},Function}`: Summary statistics that will be applied to the data before computing the distances. Defaults to `keep_all`. See [detailed documentation of summary statistics](@ref summary_stats).
- `distance_function::Union{Function,Metric}`: A function or metric that will be used to compute the distance between the summary statistic of the simulated data and that of reference data. Defaults to `Distances.euclidean`.
- `max_iter::Int`: The maximum number of simulations that will be run. The default is `1000 * n_particles`.
- `write_progress::Bool`: Whether algorithm progress should be printed on standard output. Defaults to `true`.
- `progress_every::Int`: Number of iterations at which to print progress. Defaults to 1000.
# Returns
An [`ABCRejectionOutput`](@ref) object.
"""
function SimulatedABCRejection(reference_data::AbstractArray{AF,2},
simulator_function::Function,
priors::AbstractArray{D,1},
threshold::AF,
n_particles::Int;
summary_statistic::Union{String,AbstractArray{String,1},Function}="keep_all",
distance_function::Union{Function,Metric}=Distances.euclidean,
max_iter::Int=10 * n_particles,
kwargs...
) where {
AF<:AbstractFloat,
D<:ContinuousUnivariateDistribution
}
n_params = length(priors)
summary_statistic = build_summary_statistic(summary_statistic)
reference_summary_statistic = summary_statistic(reference_data)
distance_simulation_input = DistanceSimulationInput(reference_summary_statistic, simulator_function, summary_statistic, distance_function)
input = SimulatedABCRejectionInput(n_params, n_particles, threshold,
priors,
distance_simulation_input,
max_iter)
return ABCrejection(input; kwargs...)
end
"""
SimulatedABCSMC(
reference_data,
simulator_function,
priors,
threshold_schedule,
n_particles;
summary_statistic = "keep_all",
distance_function = Distances.euclidean,
max_iter = 10 * n_particles,
write_progress = true,
progress_every = 1000,
)
Run a simulation-based ABC-SMC algorithm. This is similar to [`SimulatedABCRejection`](@ref),
the main difference being that an array of thresholds is provided instead of a single threshold.
It is assumed that thresholds are sorted in decreasing order.
A simulation based ABC iteration is executed for each threshold. For the first threshold, the provided prior is used.
For each subsequent threshold, the posterior from the previous iteration is used as a prior.
See [ABC Overview](@ref abc-overview) for more details.
# Mandatory arguments
- `reference_data::AbstractArray{Float,2}`: Observed data to which the simulated model output will be compared. Array dimensions should match that of the simulator function result.
- `simulator_function::Function`: A function that takes a parameter vector as an argument and outputs model results.
- `priors::AbstractArray{ContinuousUnivariateDistribution,1}`: Continuous univariate distributions, from which candidate parameters will be sampled during the first iteration. Array size should match the number of parameters.
- `threshold_schedule::AbstractArray{Float,1}`: The threshold schedule to be used in ABC algorithm. An ABC iteration is executed for each threshold. It is assumed that thresholds are sorted in decreasing order.
- `n_particles::Int`: The number of parameter vectors (particles) that will be included in the posterior.
# Optional keyword arguments
- `summary_statistic::Union{String,AbstractArray{String,1},Function}`: Summary statistics that will be applied to the data before computing the distances. Defaults to `keep_all`. See [detailed documentation of summary statistics](@ref summary_stats).
- `distance_function::Union{Function,Metric}`: A function or metric that will be used to compute the distance between the summary statistic of the simulated data and that of reference data. Defaults to `Distances.euclidean`.
- `max_iter::Int`: The maximum number of simulations that will be run. The default is `1000 * n_particles`.
- `write_progress::Bool`: Whether algorithm progress should be printed on standard output. Defaults to `true`.
- `progress_every::Int`: Number of iterations at which to print progress. Defaults to 1000.
# Returns
An [`ABCSMCOutput`](@ref) object.
"""
function SimulatedABCSMC(reference_data::AbstractArray{AF,2},
simulator_function::Function,
priors::AbstractArray{D,1},
threshold_schedule::AbstractArray{AF,1},
n_particles::Int;
summary_statistic::Union{String,AbstractArray{String,1},Function} = "keep_all",
distance_function::Union{Function,Metric}=Distances.euclidean,
max_iter::Int=10 * n_particles,
kwargs...
) where {
AF<:AbstractFloat,
D<:ContinuousUnivariateDistribution
}
n_params = length(priors)
summary_statistic = build_summary_statistic(summary_statistic)
reference_summary_statistic = summary_statistic(reference_data)
distance_simulation_input = DistanceSimulationInput(reference_summary_statistic, simulator_function, summary_statistic, distance_function)
input = SimulatedABCSMCInput(n_params, n_particles, threshold_schedule,
priors, distance_simulation_input,
max_iter)
return ABCSMC(input; kwargs...)
end
"""
SimulatedModelSelection
Perform model selection using simulation-based ABC.
# Arguments
- `reference_data::AbstractArray{Float64,2}`: The observed data to which the simulated model output will be compared. Size: (n_model_trajectories, n_time_points)
- `threshold_schedule::AbstractArray{Float64}`: A set of maximum distances from the summarised model output to summarised observed data for a parameter vector to be included in the posterior. Each distance will be used in a single run of the ABC-SMC algorithm.
- `parameter_priors::AbstractArray{AbstractArray{ContinuousUnivariateDistribution},1}`: Priors for the parameters of each model. The length of the outer array is the number of models.
- `summary_statistic::Union{String,AbstractArray{String,1},Function}`: Either: 1. A `String` or 1D Array of strings that Or 2. A function that outputs a 1D Array of Floats that summarises model output. Defaults to `keep_all`. See [detailed documentation of summary statistics](@ref summary_stats).
- `simulator_functions::AbstractArray{Function,1}`: An array of functions that take a parameter vector as an argument and outputs model results (one per model).
- 'model_prior::DiscreteUnivariateDistribution': The prior from which models are sampled. Default is a discrete, uniform distribution.
- `distance_function::Union{Function,Metric}`: A function or metric that computes the distance between 2 1D Arrays. Optional argument (default is to use the Euclidean distance).
- `max_iter::Int`: The maximum number of simulations that will be run. The default is 1000*`n_particles`. Each iteration samples a single model and performs ABC using a single particle.
# Returns
A [`ModelSelectionOutput`](@ref) object that contains which models are supported by the observed data.
"""
function SimulatedModelSelection(
reference_data::AbstractArray{AF,2},
simulator_functions::AbstractArray{Function,1},
parameter_priors::AbstractArray{AD,1},
threshold_schedule::AbstractArray{Float64,1},
n_particles::Int,
model_prior::DiscreteUnivariateDistribution=Distributions.DiscreteUniform(1,length(parameter_priors));
summary_statistic::Union{String,AbstractArray{String,1},Function}="keep_all",
distance_function::Union{Function,Metric}=Distances.euclidean,
max_iter::Int=10000
) where {
AF<:AbstractFloat,
D<:ContinuousUnivariateDistribution,
AD<:AbstractArray{D,1}
}
summary_statistic = build_summary_statistic(summary_statistic)
reference_summary_statistic = summary_statistic(reference_data)
distance_simulation_input = [DistanceSimulationInput(reference_summary_statistic,
simulator_functions[m], summary_statistic, distance_function) for m in 1:length(parameter_priors)]
input = SimulatedModelSelectionInput(length(parameter_priors),
n_particles,
threshold_schedule,
model_prior,
parameter_priors,
distance_simulation_input,
max_iter)
return model_selection(input)
end