/
affine_hit_and_run.jl
141 lines (114 loc) · 4.55 KB
/
affine_hit_and_run.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
$(TYPEDSIGNATURES)
Run a hit-and-run style sampling that starts from `warmup_points` and uses
their affine combinations for generating the run directions to sample the space
delimited by `lbs` and `ubs`. The reaction rate vectors in `warmup_points`
should be organized in columns, i.e. `warmup_points[:,1]` is the first set of
reaction rates.
There are total `chains` of hit-and-run runs, each on a batch of
`size(warmup_points, 2)` points. The runs are scheduled on `workers`, for good
load balancing `chains` should be ideally much greater than `length(workers)`.
Each run continues for `maximum(sample_iters)` iterations; the numbers in
`sample_iters` represent the iterations at which the whole "current" batch of
points is collected for output. For example, `sample_iters=[1,4,5]` causes the
process run for 5 iterations, returning the sample batch that was produced by
1st, 4th and last (5th) iteration.
Returns a matrix of sampled reaction rates (in columns), with all collected
samples horizontally concatenated. The total number of samples (columns) will
be `size(warmup_points,2) * chains * length(sample_iters)`.
# Example
```
warmup_points = warmup_from_variability(model, GLPK.Optimizer)
samples = affine_hit_and_run(model, warmup_points, sample_iters = 101:105)
# convert the result to flux (for models where the distinction matters):
fluxes = reaction_flux(model)' * samples
```
"""
function affine_hit_and_run(
m::MetabolicModel,
warmup_points::Matrix{Float64};
sample_iters = 100 .* (1:5),
workers = [myid()],
chains = length(workers),
seed = rand(Int),
)
@assert size(warmup_points, 1) == n_reactions(m)
lbs, ubs = bounds(m)
C = coupling(m)
cl, cu = coupling_bounds(m)
if isnothing(C)
C = zeros(0, n_reactions(m))
cl = zeros(0)
cu = zeros(0)
end
save_at.(workers, :cobrexa_hit_and_run_data, Ref((warmup_points, lbs, ubs, C, cl, cu)))
# sample all chains
samples = hcat(
pmap(
chain -> _affine_hit_and_run_chain(
(@remote cobrexa_hit_and_run_data)...,
sample_iters,
seed + chain,
),
CachingPool(workers),
1:chains,
)...,
)
# remove warmup points from workers
map(fetch, remove_from.(workers, :cobrexa_hit_and_run_data))
return samples
end
"""
$(TYPEDSIGNATURES)
Internal helper function for computing a single affine hit-and-run chain.
"""
function _affine_hit_and_run_chain(warmup, lbs, ubs, C, cl, cu, iters, seed)
rng = StableRNG(seed % UInt)
points = copy(warmup)
d, n_points = size(points)
n_couplings = size(C, 1)
result = Matrix{Float64}(undef, d, n_points * length(iters))
# helper for reducing the available run range
function update_range(range, pos, dir, lb, ub)
dl = lb - pos
du = ub - pos
lower, upper =
dir < -_constants.tolerance ? (du, dl) ./ dir :
dir > _constants.tolerance ? (dl, du) ./ dir : (-Inf, Inf)
return (max(range[1], lower), min(range[2], upper))
end
iter = 0
for (iter_idx, iter_target) in enumerate(iters)
while iter < iter_target
iter += 1
new_points = copy(points)
for i = 1:n_points
mix = rand(rng, n_points) .+ _constants.tolerance
dir = points * (mix ./ sum(mix)) - points[:, i]
# iteratively collect the maximum and minimum possible multiple
# of `dir` added to the current point
run_range = (-Inf, Inf)
for j = 1:d
run_range =
update_range(run_range, points[j, i], dir[j], lbs[j], ubs[j])
end
# do the same for coupling
dc = C * dir
pc = C * points[:, i]
for j = 1:n_couplings
run_range = update_range(run_range, pc[j], dc[j], cl[j], cu[j])
end
# generate a point in the viable run range and update it
lambda = run_range[1] + rand(rng) * (run_range[2] - run_range[1])
isfinite(lambda) || continue # avoid divergence
new_points[:, i] = points[:, i] .+ lambda .* dir
# TODO normally, here we would check if sum(S*new_point) is still
# lower than the tolerance, but we shall trust the computer
# instead.
end
points = new_points
end
result[:, n_points*(iter_idx-1)+1:iter_idx*n_points] .= points
end
result
end