/
gapfill_minimum_reactions.jl
210 lines (170 loc) · 7.69 KB
/
gapfill_minimum_reactions.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""
$(TYPEDSIGNATURES)
Find a minimal set of reactions from `universal_reactions` that should be added
to `model` so that the model has a feasible solution with bounds on its
objective function given in `objective_bounds`. Weights of the added reactions
may be specified in `weights` to prefer adding reactions with lower weights.
Internally, this builds and solves a mixed integer program, following
the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to
refining genome annotation." *Proceedings of the National Academy of Sciences*
(2006)).
The function returns a solved JuMP optimization model, with the boolean
reaction inclusion indicators in variable vector `y`. Use
[`gapfilled_mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction
information in Julia datatypes.
To reduce the uncertainty in the MILP solver (and likely reduce the
complexity), you may put a limit on the size of the added reaction set in
`maximum_new_reactions`.
# Common pitfalls
If [`gapfill_minimum_reactions`](@ref) is supposed to generate any reasonable
output, the input model *MUST NOT* be feasible, otherwise there is "no work to
do" and no reactions are added. Notably, an inactive model (the flux is zero)
is considered to be feasible. If this is the case, [`gapfilled_rids`](@ref)
will return an empty vector (as opposed to `nothing`).
To prevent this, you may need to modify the model to disallow the trivial
solutions (for example by putting a lower bound on reactions that you expect to
be working in the solved model, in a similar manner like how the ATP
maintenance reaction is bounded in E. Coli "core" model). The
`objective_bounds` parameter makes this easier by directly placing a bound on
the objective value of the model, which typically forces the model to be
active.
The `maximum_new_reactions` parameter may have critical impact on performance
in some solvers, because (in a general worst case) there is
`2^maximum_new_reactions` model variants to be examined. Putting a hard limit
on the reaction count may serve as a heuristic that helps the solver not to
waste too much time solving impractically complex subproblems.
"""
function gapfill_minimum_reactions(
model::MetabolicModel,
universal_reactions::Vector{Reaction},
optimizer;
objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = length(universal_reactions),
weights = fill(1.0, length(universal_reactions)),
modifications = [],
)
precache!(model)
# constraints from universal reactions that can fill gaps
univs = _universal_stoichiometry(universal_reactions, metabolites(model))
# add space for additional metabolites and glue with the universal reaction
# stoichiometry
extended_stoichiometry = [[
stoichiometry(model)
spzeros(length(univs.new_mids), n_reactions(model))
] univs.stoichiometry]
# make the model anew (we can't really use make_optimization_model because
# we need the balances and several other things completely removed. Could
# be solved either by parametrizing make_optimization_model or by making a
# tiny temporary wrapper for this.
# keep this in sync with src/base/solver.jl, except for adding balances.
opt_model = Model(optimizer)
@variable(opt_model, x[1:n_reactions(model)])
xl, xu = bounds(model)
@constraint(opt_model, lbs, xl .<= x)
@constraint(opt_model, ubs, x .<= xu)
C = coupling(model)
isempty(C) || begin
cl, cu = coupling_bounds(model)
@constraint(opt_model, c_lbs, cl .<= C * x)
@constraint(opt_model, c_ubs, C * x .<= cu)
end
# add the variables for new stuff
@variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions
@variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators
# combined metabolite balances
@constraint(
opt_model,
extended_stoichiometry * [x; ux] .==
[balance(model); zeros(length(univs.new_mids))]
)
# objective bounds
@constraint(opt_model, objective_bounds[1] <= objective(model)' * x)
@constraint(opt_model, objective_bounds[2] >= objective(model)' * x)
# flux bounds of universal reactions with indicators
@constraint(opt_model, ulb, univs.lbs .* y .<= ux)
@constraint(opt_model, uub, univs.ubs .* y .>= ux)
# minimize the total number of indicated reactions
@objective(opt_model, Min, weights' * y)
# limit the number of indicated reactions
# (prevents the solver from exploring too far)
@constraint(opt_model, sum(y) <= maximum_new_reactions)
# apply all modifications
for mod in modifications
mod(model, opt_model)
end
optimize!(opt_model)
return opt_model
end
"""
$(TYPEDSIGNATURES)
Get a `BitVector` of added reactions from the model solved by
[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes
of `universal_reactions` given to the gapfilling function. In case the model is
not solved, this returns `nothing`.
If this function returns a zero vector (instead of `nothing`), it is very
likely that the original model was already feasible and you may need to
constraint it more. Refer to "pitfalls" section in the documentation of
[`gapfill_minimum_reactions`](@ref) for more details.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask
"""
gapfilled_mask(opt_model)::BitVector =
is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing
"""
$(TYPEDSIGNATURES)
Utility to extract a short vector of IDs of the reactions added by the
gapfilling algorithm. Use with `opt_model` returned from
[`gapfill_minimum_reactions`](@ref).
If this function returns an empty vector (instead of `nothing`), it is very
likely that the original model was already feasible and you may need to
constraint it more. Refer to "pitfalls" section in the documentation of
[`gapfill_minimum_reactions`](@ref) for more details.
"""
gapfilled_rids(opt_model, universal_reactions::Vector{Reaction}) =
let v = gapfilled_mask(opt_model)
isnothing(v) ? nothing : [rxn.id for rxn in universal_reactions[v]]
end
"""
$(TYPEDSIGNATURES)
Overload of [`gapfilled_rids`](@ref) that can be piped easily.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_rids(myReactions)
"""
gapfilled_rids(universal_reactions::Vector{Reaction}) =
opt_model -> gapfilled_rids(opt_model, universal_reactions)
"""
$(TYPEDSIGNATURES)
A helper function that constructs the stoichiometric matrix of a set of
`universal_reactions`. The order of the metabolites is determined with
`mids`, so that this stoichiometric matrix can be combined with
another one.
"""
function _universal_stoichiometry(urxns::Vector{Reaction}, mids::Vector{String})
# traversal over all elements in stoichiometry of universal_reactions
stoiMap(f) = [
f(ridx, mid, stoi) for (ridx, rxn) in enumerate(urxns) for
(mid, stoi) in rxn.metabolites
]
# make an index and find new metabolites
met_id_lookup = Dict(mids .=> eachindex(mids))
new_mids =
collect(Set(filter(x -> !haskey(met_id_lookup, x), stoiMap((_, mid, _) -> mid))))
all_mids = vcat(mids, new_mids)
# remake the index with all metabolites
met_id_lookup = Dict(all_mids .=> eachindex(all_mids))
# build the result
return (
stoichiometry = float.(
sparse(
stoiMap((_, mid, _) -> met_id_lookup[mid]),
stoiMap((ridx, _, _) -> ridx),
stoiMap((_, _, stoi) -> stoi),
length(all_mids),
length(urxns),
),
),
lbs = [rxn.lb for rxn in urxns],
ubs = [rxn.ub for rxn in urxns],
new_mids = new_mids,
)
end