/
io.jl
347 lines (291 loc) · 14.1 KB
/
io.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#
# Input types
#
abstract type ABCInput end
abstract type ABCRejectionInput <: ABCInput end
RepetitiveTraining(; rt_iterations::Int64=0, rt_extra_training_points::Int64=1, rt_sample_size::Int64=1000) =
RepetitiveTraining(rt_iterations, rt_extra_training_points, rt_sample_size)
struct DistanceSimulationInput
reference_summary_statistic::AbstractArray{Float64,1}
simulator_function::Function
summary_statistic::Function
distance_metric::Union{Function,Metric}
end
"""
AbstractEmulatorRetraining
Subtypes of this abstract type control the additional retraining procedure that may or may not be carried out before each iteration of emulation-based ABC. Custom strategies may be implemented by creating new subtypes of this type and new [`abc_retrain_emulator`](@ref) methods for them.
The following implementations are shipped:
- [`IncrementalRetraining`](@ref)
- [`PreviousPopulationRetraining`](@ref)
- [`PreviousPopulationThresholdRetraining`](@ref)
- [`NoopRetraining`](@ref)
"""
abstract type AbstractEmulatorRetraining end
"""
IncrementalRetraining <: AbstractEmulatorRetraining
This emulator retraining strategy samples extra particles from the previous population, and adds them to the set of design points that were used on the previous iteration. The new design points are filtered according to the threshold. The combined set of design points is used to train the new emulator.
# Fields
- `design_points`: number of design points to add on each iteration
- `max_simulations`: maximum number of simulations to perform during re-training on each iteration
"""
struct IncrementalRetraining <: AbstractEmulatorRetraining
design_points::Int
max_simulations::Int
end
"""
PreviousPopulationRetraining <: AbstractEmulatorRetraining
This emulator retraining strategy samples extra particles from the previous population, and uses them to re-train the emulator from scratch. No filtering of the new design points is performed. Design points from the previous iteration are discarded.
"""
struct PreviousPopulationRetraining <: AbstractEmulatorRetraining end
"""
PreviousPopulationThresholdRetraining <: AbstractEmulatorRetraining
This emulator retraining strategy samples extra particles from the previous population, and uses them to re-train the emulator from scratch. Design points from the previous iteration are discarded. This strategy allows to control how many design points are sampled with distance to the reference data below the threshold.
# Fields:
- `n_design_points`: number of design points
- `n_below_threshold`: number of design points below the threshold
- `max_iter`: maximum number of simulations to perform on each re-training iteration
"""
struct PreviousPopulationThresholdRetraining <: AbstractEmulatorRetraining
n_design_points::Int
n_below_threshold::Int
max_iter::Int
end
"""
NoopRetraining <: AbstractEmulatorRetraining
A sentinel retraining strategy that does not do anything. When used, the emulator is trained only once at the start of the process.
"""
struct NoopRetraining <: AbstractEmulatorRetraining end
"""
AbstractEmulatorTraining
Subtypes of this abstract type control how the emulator is trained for emulation-based ABC algorithms (rejection and SMC).
At the moment, only [`DefaultEmulatorTraining`](@ref) is shipped. Custom emulator training procedure
can be implemented by creating new subtypes of this type and overriding [`train_emulator`](@ref) for them.
A typical use case would be trying to control the behaviour of [`gp_train`](@ref) more tightly,
or not using it altogether (e.g. using another optimisation package).
"""
abstract type AbstractEmulatorTraining end
"""
DefaultEmulatorTraining <: AbstractEmulatorTraining
# Fields
- `kernel::AbstractGPKernel`: the kernel ([`AbstractGPKernel`](@ref)) that will be used with the Gaussian Process ([`GPModel`](@ref)). Defaults to [`SquaredExponentialArdKernel`](@ref).
[`train_emulator`](@ref) method with this argument type calls [`gp_train`](@ref) with default arguments.
"""
struct DefaultEmulatorTraining{K<:AbstractGPKernel} <: AbstractEmulatorTraining
kernel::K
end
DefaultEmulatorTraining() = DefaultEmulatorTraining(SquaredExponentialArdKernel())
struct EmulatorTrainingInput{ET<:AbstractEmulatorTraining}
distance_simulation_input::DistanceSimulationInput
design_points::Int64
emulator_training::ET
end
EmulatorTrainingInput(dsi::DistanceSimulationInput) = EmulatorTrainingInput(dsi, DefaultEmulatorTraining())
EmulatorTrainingInput(n_design_points, reference_summary_statistic, simulator_function, summary_statistic, distance_metric, et=DefaultEmulatorTraining()) =
EmulatorTrainingInput(DistanceSimulationInput(
reference_summary_statistic, simulator_function,
build_summary_statistic(summary_statistic), distance_metric),
n_design_points, et)
"""
AbstractEmulatedParticleSelection
Subtypes of this type control the criteria that determine what particles are included in the posterior for emulation-based ABC. Custom strategies
can be implemented by creating new subtypes of this type and overriding [`abc_select_emulated_particles`](@ref) for them.
Three implementations are shipped:
- [`MeanEmulatedParticleSelection`](@ref)
- [`MeanVarEmulatedParticleSelection`](@ref)
- [`PosteriorSampledEmulatedParticleSelection`](@ref)
"""
abstract type AbstractEmulatedParticleSelection end
"""
MeanEmulatedParticleSelection <: AbstractEmulatedParticleSelection
When this strategy is used, the particles for which only the *mean* value returned by
[`gp_regression`](@ref) is below the ABC threshold are included in the posterior.
Variance is not checked.
"""
struct MeanEmulatedParticleSelection <: AbstractEmulatedParticleSelection end
"""
MeanVarEmulatedParticleSelection <: AbstractEmulatedParticleSelection
When this strategy is used, the particles for which both *mean and standard deviation* returned by
[`gp_regression`](@ref) is below the ABC threshold are included in the posterior. In pseudocode:
```julia
means, vars = gp_regression(parameters, gpm)
accepted_indices = findall((means .<= threshold) .& (sqrt.(vars) .<= threshold))
```
The rationale behind using this selection strategy is to take into account the "level of uncertainty"
about the regression prediction that is provided by the Gaussian Process in form of standard deviation.
So, even if the mean of the GP is below the threshold, but the GP is "uncertain" about it
(i.e. the variance is high), this particle will not be included in the posterior distribution of ABC.
It is a more stringent acceptance criteria than `MeanEmulatedParticleSelection`.
# Fields
- `variance_threshold_factor`: scaling factor, by which the ABC threshold is multiplied
before checking the standard deviation. Defaults to 1.0.
"""
struct MeanVarEmulatedParticleSelection <: AbstractEmulatedParticleSelection
variance_threshold_factor::Float64
end
MeanVarEmulatedParticleSelection() = MeanVarEmulatedParticleSelection(1.0)
"""
PosteriorSampledEmulatedParticleSelection <: AbstractEmulatedParticleSelection
When this strategy is used, the distance is sampled from the GP posterior of the [`gp_regression`](@ref)
object. If the sampled distance is below the threshold the particle is accepted.
# Fields
- `use_diagonal_covariance`: if `true`, the GP posterior covariance will be approximated by its
diagonal elements only. Defaults to `false`.
"""
struct PosteriorSampledEmulatedParticleSelection <: AbstractEmulatedParticleSelection
use_diagonal_covariance::Bool
end
PosteriorSampledEmulatedParticleSelection() = PosteriorSampledEmulatedParticleSelection(false)
struct SimulatedABCRejectionInput <: ABCRejectionInput
n_params::Int64
n_particles::Int64
threshold::Float64
priors::AbstractArray{ContinuousUnivariateDistribution,1}
distance_simulation_input::DistanceSimulationInput
max_iter::Int
end
struct EmulatedABCRejectionInput{CUD<:ContinuousUnivariateDistribution, EPS<:AbstractEmulatedParticleSelection} <: ABCRejectionInput
n_params::Int64
n_particles::Int64
threshold::Float64
priors::AbstractArray{CUD,1}
batch_size::Int64
max_iter::Int64
emulator_training_input::EmulatorTrainingInput
selection::EPS
end
abstract type ABCSMCInput <: ABCInput end
struct SimulatedABCSMCInput <: ABCSMCInput
n_params::Int64
n_particles::Int64
threshold_schedule::AbstractArray{Float64,1}
priors::AbstractArray{ContinuousUnivariateDistribution,1}
distance_simulation_input::DistanceSimulationInput
max_iter::Int
end
SimulatedABCRejectionInput(smc_input::SimulatedABCSMCInput) =
SimulatedABCRejectionInput(smc_input.n_params,
smc_input.n_particles,
smc_input.threshold_schedule[1],
smc_input.priors,
smc_input.distance_simulation_input,
smc_input.max_iter)
struct EmulatedABCSMCInput{CUD<:ContinuousUnivariateDistribution,
ER<:AbstractEmulatorRetraining, EPS<:AbstractEmulatedParticleSelection} <: ABCSMCInput
n_params::Int64
n_particles::Int64
threshold_schedule::AbstractArray{Float64,1}
priors::AbstractArray{CUD,1}
batch_size::Int64
max_iter::Int64
emulator_training_input::EmulatorTrainingInput
emulator_retraining::ER
selection::EPS
end
#
# Tracker types
#
abstract type ABCSMCTracker end
mutable struct SimulatedABCSMCTracker <: ABCSMCTracker
n_params::Int64
n_accepted::AbstractArray{Int64,1}
n_tries::AbstractArray{Int64,1}
threshold_schedule::AbstractArray{Float64,1}
can_continue::Bool # we used to return this flag from iterateABCSMC!, but it got broken in julia 1.0 - see https://github.com/JuliaLang/julia/issues/29805
population::AbstractArray{AbstractArray{Float64,2},1}
distances::AbstractArray{AbstractArray{Float64,1},1}
weights::AbstractArray{StatsBase.Weights,1}
priors::AbstractArray{ContinuousUnivariateDistribution,1}
distance_simulation_input::DistanceSimulationInput
max_iter::Int64
end
mutable struct EmulatedABCSMCTracker{CUD<:ContinuousUnivariateDistribution, ET,
ER<:AbstractEmulatorRetraining, EPS<:AbstractEmulatedParticleSelection} <: ABCSMCTracker
n_params::Int64
n_accepted::AbstractArray{Int64,1}
n_tries::AbstractArray{Int64,1}
threshold_schedule::AbstractArray{Float64,1}
can_continue::Bool # we used to return this flag from iterateABCSMC!, but it got broken in julia 1.0 - see https://github.com/JuliaLang/julia/issues/29805
population::AbstractArray{AbstractArray{Float64,2},1}
distances::AbstractArray{AbstractArray{Float64,1},1}
weights::AbstractArray{StatsBase.Weights,1}
priors::AbstractArray{CUD,1}
emulator_training_input::EmulatorTrainingInput
emulator_retraining::ER
selection::EPS
batch_size::Int64
max_iter::Int64
emulators::AbstractArray{ET,1}
end
#
# Output types
#
abstract type ABCOutput end
"""
ABCRejectionOutput
A container for the output of a rejection-ABC computation.
# Fields
- `n_params::Int64`: The number of parameters to be estimated.
- `n_accepted::Int64`: The number of accepted parameter vectors (particles) in the posterior.
- `n_tries::Int64`: The total number of parameter vectors (particles) that were tried.
- `threshold::Float64`: The maximum distance from the summarised model output to summarised observed data for a parameter vector to be included in the posterior.
- `population::AbstractArray{Float64,2}`: The parameter vectors (particles) in the posterior. Size: (`n_accepted`, `n_params`).
- `distances::AbstractArray{Float64,1}`: The distances for each parameter vector (particle) in the posterior to the observed data in summary statistic space. Size: (`n_accepted`).
- `weights::StatsBase.Weights`: The weight of each parameter vector (particle) in the posterior.
"""
abstract type ABCRejectionOutput <: ABCOutput end
struct EmulatedABCRejectionOutput{ET} <: ABCRejectionOutput
n_params::Int64
n_accepted::Int64
n_tries::Int64
threshold::Float64
population::AbstractArray{Float64,2}
distances::AbstractArray{Float64,1}
weights::StatsBase.Weights
emulator::ET
end
struct SimulatedABCRejectionOutput <: ABCRejectionOutput
n_params::Int64
n_accepted::Int64
n_tries::Int64
threshold::Float64
population::AbstractArray{Float64,2}
distances::AbstractArray{Float64,1}
weights::StatsBase.Weights
end
"""
ABCSMCOutput
A container for the output of a rejection-ABC computation.
# Fields
- `n_params::Int64`: The number of parameters to be estimated.
- `n_accepted::Int64`: The number of accepted parameter vectors (particles) in the posterior.
- `n_tries::Int64`: The total number of parameter vectors (particles) that were tried.
- `threshold_schedule::AbstractArray{Float64}`: A set of maximum distances from the summarised model output to summarised observed data for a parameter vector to be included in the posterior. Each distance will be used in a single run of the ABC-SMC algorithm.
- `population::AbstractArray{Float64,2}`: The parameter vectors (particles) in the posterior. Size: (`n_accepted`, `n_params`).
- `distances::AbstractArray{Float64,1}`: The distances for each parameter vector (particle) in the posterior to the observed data in summary statistic space. Size: (`n_accepted`).
- `weights::StatsBase.Weights`: The weight of each parameter vector (particle) in the posterior.
"""
abstract type ABCSMCOutput <: ABCOutput end
struct SimulatedABCSMCOutput <: ABCSMCOutput
n_params::Int64
n_accepted::AbstractArray{Int64,1}
n_tries::AbstractArray{Int64,1}
threshold_schedule::AbstractArray{Float64,1}
population::AbstractArray{AbstractArray{Float64,2},1}
distances::AbstractArray{AbstractArray{Float64,1},1}
weights::AbstractArray{StatsBase.Weights,1}
end
struct EmulatedABCSMCOutput <: ABCSMCOutput
n_params::Int64
n_accepted::AbstractArray{Int64,1}
n_tries::AbstractArray{Int64,1}
threshold_schedule::AbstractArray{Float64,1}
population::AbstractArray{AbstractArray{Float64,2},1}
distances::AbstractArray{AbstractArray{Float64,1},1}
weights::AbstractArray{StatsBase.Weights,1}
emulators::AbstractArray{GPModel,1}
end
function checkABCInput(input::ABCInput)
if input.n_params != length(input.priors)
throw(ArgumentError("There are $(input.n_params) unknown parameters but $(length(input.priors)) priors were provided"))
end
end