In [48]:
import numpy as np
import pandas as pd
import cobra
from cobra.io import read_sbml_model
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import pfba
from scipy.optimize import dual_annealing
from IPython.display import display

In [49]:
model = read_sbml_model('iCHO2441_221-107_producing.xml')
model

0,1
Name,iCHO2441_221107_producing
Memory address,1ce6def75c0
Number of metabolites,4174
Number of reactions,6337
Number of genes,2441
Number of groups,15
Objective expression,1.0*biomass_cho_prod - 1.0*biomass_cho_prod_reverse_1b5b7
Compartments,"cytosol, lysosome, mitochondria, endoplasmicReticulum, nucleus, extracellularSpace, peroxisome, golgiApparatus, secretoryVesicle"


In [50]:
#update bounds to match experimental late exponential data
bounds_df = pd.read_csv('bounds_df.csv')

for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    reaction.lower_bound = row['lower bound']
    reaction.upper_bound = row['upper bound']

In [51]:
#test all bounds updated correctly
mismatches = []
for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    if reaction.lower_bound != row['lower bound'] or reaction.upper_bound != row['upper bound']:
        mismatches.append((row['reaction'], reaction.lower_bound, reaction.upper_bound, row['lower bound'], row['upper bound']))

# Print mismatches if any
if mismatches:
    print(f"{len(mismatches)} reactions have incorrect bounds:")
    for rxn, lb_model, ub_model, lb_csv, ub_csv in mismatches[:10]:  # Show first 10 mismatches
        print(f"{rxn}: Model({lb_model}, {ub_model}) != CSV({lb_csv}, {ub_csv})")
else:
    print("All reaction bounds were correctly updated!")

All reaction bounds were correctly updated!


In [52]:
#remove non-negative bound on lactate and ammonia exchange reactions to match experimental findings

model.reactions.get_by_id('EX_lac_L(e)').lower_bound = -1000
model.reactions.get_by_id('EX_nh4(e)').lower_bound = -1000

""" For reference:

igg = model.reactions.get_by_id('igg_formation')
lactate = model.reactions.get_by_id('EX_lac_L(e)')
glutamine = model.reactions.get_by_id('EX_gln_L(e)')
glucose = model.reactions.get_by_id('EX_glc(e)')
ammonia = model.reactions.get_by_id('EX_nh4(e)')
biomass = model.reactions.get_by_id('biomass_cho_prod')

"""

In [54]:
# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

%%time

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": -1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod', 'EX_glc(e)', 'EX_gln_L(e)', 'EX_nh4(e)', 'EX_lac_L(e)']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))

Optimal solution: [('igg_formation', 0.265376114797455), ('biomass_cho_prod', 0.2705508099372251), ('EX_glc(e)', -0.04705858234519941), ('EX_gln_L(e)', -0.19314941462833968), ('EX_nh4(e)', -0.0002114846054501431), ('EX_lac_L(e)', -0.22365359368633075)]
CPU times: total: 32min 17s
Wall time: 32min 23s


In [55]:
# summary of the solution from the slower standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.0470585823451994*EX_glc(e) + 0.0470585823451994*EX_glc(e)_reverse_bcf3e - 0.19314941462834*EX_gln_L(e) + 0.19314941462834*EX_gln_L(e)_reverse_75782 - 0.223653593686331*EX_lac_L(e) + 0.223653593686331*EX_lac_L(e)_reverse_32b05 - 0.000211484605450143*EX_nh4(e) + 0.000211484605450143*EX_nh4(e)_reverse_db85a + 0.270550809937225*biomass_cho_prod - 0.270550809937225*biomass_cho_prod_reverse_1b5b7 + 0.265376114797455*igg_formation - 0.265376114797455*igg_formation_reverse_7519c

igg flux:  2.03310159434104e-05

biomass flux:  0.0020123131513649


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.001197,6,0.38%
asp_L[e],EX_asp_L(e),0.009341,4,1.98%
chol[e],EX_chol(e),0.000162,5,0.04%
cys_L[e],EX_cys_L(e),0.0009547,3,0.15%
glc_D[e],EX_glc(e),0.1984,6,63.20%
gln_L[e],EX_gln_L(e),0.06703,5,17.80%
his_L[e],EX_his_L(e),0.0007734,6,0.25%
hxan[e],EX_hxan(e),0.006195,5,1.64%
ile_L[e],EX_ile_L(e),0.001129,6,0.36%
lac_L[e],EX_lac_L(e),0.02528,3,4.03%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-2.033e-05,95,0.12%
4abut[e],EX_4abut(e),-0.03508,4,8.41%
ac[e],EX_ac(e),-0.00221,2,0.27%
ala_L[e],EX_ala_L(e),-0.06116,3,11.00%
asn_L[e],EX_asn_L(e),-0.004144,4,0.99%
cbasp[e],EX_cbasp(e),-0.02783,5,8.34%
citr_L[e],EX_citr_L(e),-0.000331,6,0.12%
co2[e],EX_co2(e),-0.007347,1,0.44%
for[e],EX_for(e),-0.0003088,1,0.02%
glu_L[e],EX_glu_L(e),-0.005483,5,1.64%


In [56]:
# faster version which looks at standard FBA solutions to find an optimal objective function - still testing this to make sure errors don't occur

%%time

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": -1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod', 'EX_glc(e)', 'EX_gln_L(e)', 'EX_nh4(e)', 'EX_lac_L(e)']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for i, rxn_id in enumerate(objective_reactions):
        model.reactions.get_by_id(rxn_id).objective_coefficient = c[i]  
    
    # Solve the FBA problem for the given objective function
    solution = model.slim_optimize()
    
    # Compute qualitative fluxes
    fluxes = np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions])
    qualitative_fluxes = np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int)
    
    # Compute agreement and return the number of qualitative mismatches
    return np.count_nonzero(np.sign(fluxes) != qualitative_fluxes)

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

#Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence the optimal objective function
#See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))

Optimal solution: [('igg_formation', 0.14741950833371764), ('biomass_cho_prod', 0.2021154938968243), ('EX_glc(e)', -0.14450812913216063), ('EX_gln_L(e)', -0.12102607372055381), ('EX_nh4(e)', -0.00011610625232944216), ('EX_lac_L(e)', -0.3848146886644142)]
CPU times: total: 16min 25s
Wall time: 16min 29s


In [57]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.144508129132161*EX_glc(e) + 0.144508129132161*EX_glc(e)_reverse_bcf3e - 0.121026073720554*EX_gln_L(e) + 0.121026073720554*EX_gln_L(e)_reverse_75782 - 0.384814688664414*EX_lac_L(e) + 0.384814688664414*EX_lac_L(e)_reverse_32b05 - 0.000116106252329442*EX_nh4(e) + 0.000116106252329442*EX_nh4(e)_reverse_db85a + 0.202115493896824*biomass_cho_prod - 0.202115493896824*biomass_cho_prod_reverse_1b5b7 + 0.147419508333718*igg_formation - 0.147419508333718*igg_formation_reverse_7519c

igg flux:  2.0331015943410426e-05

biomass flux:  0.0020123131513649


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.001198,6,0.38%
asp_L[e],EX_asp_L(e),0.009341,4,1.98%
chol[e],EX_chol(e),0.000162,5,0.04%
cys_L[e],EX_cys_L(e),0.0009549,3,0.15%
glc_D[e],EX_glc(e),0.1984,6,63.20%
gln_L[e],EX_gln_L(e),0.06703,5,17.80%
his_L[e],EX_his_L(e),0.0007735,6,0.25%
hxan[e],EX_hxan(e),0.006195,5,1.64%
ile_L[e],EX_ile_L(e),0.00113,6,0.36%
lac_L[e],EX_lac_L(e),0.02528,3,4.03%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-2.034e-05,95,0.12%
4abut[e],EX_4abut(e),-0.03509,4,8.42%
ac[e],EX_ac(e),-0.00221,2,0.27%
ala_D[e],EX_ala_D(e),-0.003227,3,0.58%
ala_L[e],EX_ala_L(e),-0.05794,3,10.42%
asn_L[e],EX_asn_L(e),-0.00413,4,0.99%
cbasp[e],EX_cbasp(e),-0.02783,5,8.35%
citr_L[e],EX_citr_L(e),-0.0003373,6,0.12%
co2[e],EX_co2(e),-0.007347,1,0.44%
for[e],EX_for(e),-0.0003088,1,0.02%


In [61]:
# this is the same method but looking at pFBA solutions to find an optimal objective function - haven't looked to speed this up yet

%%time

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": -1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": -1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod', 'EX_glc(e)', 'EX_gln_L(e)', 'EX_nh4(e)', 'EX_lac_L(e)']  # Reactions that can be included in the objective function

# Run pFBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the pFBA problem for the given objective function
    solution = pfba(model)
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Define callback function for the dual annealing search to stop looking if objective function mismatch score reaches 0
def stop_if_zero(x, f, context):
    """Stops the optimization early if f, the function being minimised (qualitative_objective_difference), reaches 0."""
    if f == 0:
        return True  # This signals to stop the optimization
    return False  # Continue searching

#Perform Simulated Annealing to find global minimum value for the difference of pFBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
#See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds, callback=stop_if_zero)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))

Optimal solution: [('igg_formation', 0.2562750745311306), ('biomass_cho_prod', 0.38797738170009), ('EX_glc(e)', -0.2232832188770393), ('EX_gln_L(e)', -0.07415877213130341), ('EX_nh4(e)', -0.0002124527638662757), ('EX_lac_L(e)', -0.05809309999657041)]
CPU times: total: 55min 44s
Wall time: 55min 54s


In [60]:
# summary of the solution from the pFBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.144508129132161*EX_glc(e) + 0.144508129132161*EX_glc(e)_reverse_bcf3e - 0.121026073720554*EX_gln_L(e) + 0.121026073720554*EX_gln_L(e)_reverse_75782 - 0.384814688664414*EX_lac_L(e) + 0.384814688664414*EX_lac_L(e)_reverse_32b05 - 0.000116106252329442*EX_nh4(e) + 0.000116106252329442*EX_nh4(e)_reverse_db85a + 0.202115493896824*biomass_cho_prod - 0.202115493896824*biomass_cho_prod_reverse_1b5b7 + 0.147419508333718*igg_formation - 0.147419508333718*igg_formation_reverse_7519c

igg flux:  2.033097819338789e-05

biomass flux:  0.0020123131513649


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.001197,6,0.38%
asp_L[e],EX_asp_L(e),0.009341,4,1.98%
chol[e],EX_chol(e),0.000162,5,0.04%
cys_L[e],EX_cys_L(e),0.0009547,3,0.15%
glc_D[e],EX_glc(e),0.1984,6,63.20%
gln_L[e],EX_gln_L(e),0.06703,5,17.80%
his_L[e],EX_his_L(e),0.0007734,6,0.25%
hxan[e],EX_hxan(e),0.006195,5,1.64%
ile_L[e],EX_ile_L(e),0.001129,6,0.36%
lac_L[e],EX_lac_L(e),0.02528,3,4.03%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-2.033e-05,95,0.12%
4abut[e],EX_4abut(e),-0.03508,4,8.41%
ac[e],EX_ac(e),-0.00221,2,0.27%
ala_L[e],EX_ala_L(e),-0.06116,3,11.00%
asn_L[e],EX_asn_L(e),-0.004145,4,0.99%
cbasp[e],EX_cbasp(e),-0.02783,5,8.34%
citr_L[e],EX_citr_L(e),-0.000331,6,0.12%
co2[e],EX_co2(e),-0.007347,1,0.44%
for[e],EX_for(e),-0.0003088,1,0.02%
glu_L[e],EX_glu_L(e),-0.005483,5,1.64%
