In [18]:
import numpy as np
import pandas as pd
import cobra
from cobra.io import read_sbml_model
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import pfba
from scipy.optimize import dual_annealing
from IPython.display import display

In [19]:
model = read_sbml_model('iCHO2441_221-107_producing.xml')
model

0,1
Name,iCHO2441_221107_producing
Memory address,1c13bd2dd90
Number of metabolites,4174
Number of reactions,6337
Number of genes,2441
Number of groups,15
Objective expression,1.0*biomass_cho_prod - 1.0*biomass_cho_prod_reverse_1b5b7
Compartments,"cytosol, lysosome, mitochondria, endoplasmicReticulum, nucleus, extracellularSpace, peroxisome, golgiApparatus, secretoryVesicle"


In [20]:
#update bounds to match experimental early exponential data
bounds_df = pd.read_csv('stationary_death_bounds.csv')

for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    reaction.lower_bound = row['lower bound']
    reaction.upper_bound = row['upper bound']

In [21]:
#test all bounds updated correctly
mismatches = []
for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    if reaction.lower_bound != row['lower bound'] or reaction.upper_bound != row['upper bound']:
        mismatches.append((row['reaction'], reaction.lower_bound, reaction.upper_bound, row['lower bound'], row['upper bound']))

# Print mismatches if any
if mismatches:
    print(f"{len(mismatches)} reactions have incorrect bounds:")
    for rxn, lb_model, ub_model, lb_csv, ub_csv in mismatches[:10]:  # Show first 10 mismatches
        print(f"{rxn}: Model({lb_model}, {ub_model}) != CSV({lb_csv}, {ub_csv})")
else:
    print("All reaction bounds were correctly updated!")

All reaction bounds were correctly updated!


In [22]:
#remove non-negative bound on lactate and ammonia exchange reactions to match experimental findings

model.reactions.get_by_id('EX_lac_L(e)').lower_bound = -1000
model.reactions.get_by_id('EX_nh4(e)').lower_bound = -1000
model.reactions.get_by_id('biomass_cho_prod').lower_bound = -1000

In [6]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": -1,   # Biomass secretion (negative flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Lactate uptake (positive flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod', 'EX_glc(e)', 'EX_gln_L(e)', 'EX_nh4(e)', 'EX_lac_L(e)']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', 0.05879546337200458), ('biomass_cho_prod', 0.18369749548234376), ('EX_glc(e)', -0.3746589627514603), ('EX_gln_L(e)', 0.20071761166140473), ('EX_nh4(e)', 0.17669010288023276), ('EX_lac_L(e)', -0.005440363852553834)]

Percentage accuracy for solution:  0.5
CPU times: total: 32min 34s
Wall time: 32min 44s


In [7]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.37465896275146*EX_glc(e) + 0.37465896275146*EX_glc(e)_reverse_bcf3e + 0.200717611661405*EX_gln_L(e) - 0.200717611661405*EX_gln_L(e)_reverse_75782 - 0.00544036385255383*EX_lac_L(e) + 0.00544036385255383*EX_lac_L(e)_reverse_32b05 + 0.176690102880233*EX_nh4(e) - 0.176690102880233*EX_nh4(e)_reverse_db85a + 0.183697495482344*biomass_cho_prod - 0.183697495482344*biomass_cho_prod_reverse_1b5b7 + 0.0587954633720046*igg_formation - 0.0587954633720046*igg_formation_reverse_7519c
[0.         0.         0.42139194 0.        ]
[ 1 -1  1 -1]

igg flux:  0.0

biomass flux:  0.0


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.01978,6,4.54%
asn_L[e],EX_asn_L(e),0.04038,4,6.18%
asp_L[e],EX_asp_L(e),0.009341,4,1.43%
chol[e],EX_chol(e),0.008753,5,1.67%
cys_L[e],EX_cys_L(e),0.00522,3,0.60%
fol[e],EX_fol(e),0.0004567,19,0.33%
glc_D[e],EX_glc(e),0.1984,6,45.50%
gln_L[e],EX_gln_L(e),0.06703,5,12.81%
his_L[e],EX_his_L(e),0.003297,6,0.76%
ile_L[e],EX_ile_L(e),0.01016,6,2.33%

Metabolite,Reaction,Flux,C-Number,C-Flux
2hb[e],EX_2hb(e),-0.007328,4,1.11%
2hyoxplac[e],EX_2hyoxplac(e),-0.006711,8,2.03%
34dhoxpeg[e],EX_34dhoxpeg(e),-0.007009,8,2.12%
3mob[e],EX_3mob(e),-0.01112,5,2.10%
3mop[e],EX_3mop(e),-0.01113,6,2.52%
3mox4hoxm[e],EX_3mox4hoxm(e),-0.003405,9,1.16%
4mop[e],EX_4mop(e),-0.01283,6,2.90%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.34%
ac[e],EX_ac(e),-0.003214,2,0.24%
acald[e],EX_acald(e),-0.00223,2,0.17%


In [8]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": -1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', 1.0)]

Percentage accuracy for solution:  0.5
CPU times: total: 3min 13s
Wall time: 3min 14s


In [9]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
1.0*igg_formation - 1.0*igg_formation_reverse_7519c
[8.39306926e-05 0.00000000e+00 2.03693254e-01 1.43115167e-02]
[ 1 -1  1 -1]

igg flux:  8.393069257313138e-05

biomass flux:  0.0


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.00244,6,0.76%
asn_L[e],EX_asn_L(e),0.004364,4,0.90%
asp_L[e],EX_asp_L(e),0.003861,4,0.80%
cys_L[e],EX_cys_L(e),0.002686,3,0.42%
fol[e],EX_fol(e),0.0004567,19,0.45%
glc_D[e],EX_glc(e),0.1984,6,61.69%
gln_L[e],EX_gln_L(e),0.04994,5,12.94%
his_L[e],EX_his_L(e),0.002182,6,0.68%
ile_L[e],EX_ile_L(e),0.00235,6,0.73%
leu_L[e],EX_leu_L(e),0.007554,6,2.35%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-8.393e-05,95,0.57%
4abut[e],EX_4abut(e),-0.03187,4,9.19%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.66%
acald[e],EX_acald(e),-0.0008244,2,0.12%
ala_L[e],EX_ala_L(e),-0.01757,3,3.80%
bhb[e],EX_bhb(e),-0.00104,4,0.30%
co2[e],EX_co2(e),-0.003083,1,0.22%
for[e],EX_for(e),-0.0003062,1,0.02%
h[e],EX_h(e),-0.2958,0,0.00%
h2o[e],EX_h2o(e),-0.4043,0,0.00%


In [10]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": -1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['biomass_cho_prod']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('biomass_cho_prod', -1.0)]

Percentage accuracy for solution:  0.25
CPU times: total: 3min 49s
Wall time: 2h 4min 6s


In [11]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-1.0*biomass_cho_prod + 1.0*biomass_cho_prod_reverse_1b5b7
[0.         0.         0.04975632 0.02123401]
[ 1 -1  1 -1]

igg flux:  0.0

biomass flux:  0.0


Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


In [27]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 0,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', "EX_lac_L(e)"]  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', 0.2885212669314776), ('EX_lac_L(e)', -0.7114787330685224)]

Percentage accuracy for solution:  1.0
CPU times: total: 7min 59s
Wall time: 8min 2s


In [28]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.711478733068522*EX_lac_L(e) + 0.711478733068522*EX_lac_L(e)_reverse_32b05 + 0.288521266931478*igg_formation - 0.288521266931478*igg_formation_reverse_7519c
[8.39306926e-05 0.00000000e+00 9.58971680e-02 0.00000000e+00]
[ 1  0  1 -1]

igg flux:  8.393069257313135e-05

biomass flux:  0.0


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.00244,6,0.73%
asn_L[e],EX_asn_L(e),0.004364,4,0.87%
asp_L[e],EX_asp_L(e),0.003861,4,0.77%
cys_L[e],EX_cys_L(e),0.002686,3,0.40%
fol[e],EX_fol(e),0.0004567,19,0.43%
glc_D[e],EX_glc(e),0.1984,6,59.57%
gln_L[e],EX_gln_L(e),0.04623,5,11.57%
his_L[e],EX_his_L(e),0.002182,6,0.66%
ile_L[e],EX_ile_L(e),0.00235,6,0.71%
lac_L[e],EX_lac_L(e),0.02483,3,3.73%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-8.393e-05,95,0.55%
4abut[e],EX_4abut(e),-0.03512,4,9.64%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.63%
acald[e],EX_acald(e),-0.0008244,2,0.11%
ala_L[e],EX_ala_L(e),-0.0134,3,2.76%
bhb[e],EX_bhb(e),-0.004749,4,1.30%
co2[e],EX_co2(e),-0.003083,1,0.21%
for[e],EX_for(e),-0.0003062,1,0.02%
h[e],EX_h(e),-0.2989,0,0.00%
h2o[e],EX_h2o(e),-0.449,0,0.00%
