In [2]:
import numpy as np
import pandas as pd
import cobra
from cobra.io import read_sbml_model
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import pfba
from scipy.optimize import dual_annealing
from IPython.display import display

In [3]:
model = read_sbml_model('iCHO2441_221-107_producing.xml')
model

Set parameter Username
Set parameter LicenseID to value 2611274
Academic license - for non-commercial use only - expires 2026-01-17


0,1
Name,iCHO2441_221107_producing
Memory address,2adae5e7ef0
Number of metabolites,4174
Number of reactions,6337
Number of genes,2441
Number of groups,15
Objective expression,1.0*biomass_cho_prod - 1.0*biomass_cho_prod_reverse_1b5b7
Compartments,"cytosol, lysosome, mitochondria, endoplasmicReticulum, nucleus, extracellularSpace, peroxisome, golgiApparatus, secretoryVesicle"


In [4]:
#update bounds to match experimental early exponential data
bounds_df = pd.read_csv('early_exponential_bounds.csv')

for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    reaction.lower_bound = row['lower bound']
    reaction.upper_bound = row['upper bound']

In [5]:
#test all bounds updated correctly
mismatches = []
for index, row in bounds_df.iterrows():
    reaction = model.reactions.get_by_id(row['reaction'])
    if reaction.lower_bound != row['lower bound'] or reaction.upper_bound != row['upper bound']:
        mismatches.append((row['reaction'], reaction.lower_bound, reaction.upper_bound, row['lower bound'], row['upper bound']))

# Print mismatches if any
if mismatches:
    print(f"{len(mismatches)} reactions have incorrect bounds:")
    for rxn, lb_model, ub_model, lb_csv, ub_csv in mismatches[:10]:  # Show first 10 mismatches
        print(f"{rxn}: Model({lb_model}, {ub_model}) != CSV({lb_csv}, {ub_csv})")
else:
    print("All reaction bounds were correctly updated!")

All reaction bounds were correctly updated!


In [6]:
#remove non-negative bound on lactate and ammonia exchange reactions to match experimental findings

model.reactions.get_by_id('EX_lac_L(e)').lower_bound = -1000
model.reactions.get_by_id('EX_nh4(e)').lower_bound = -1000

In [14]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": 1   # Lactate uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod', 'EX_glc(e)', 'EX_gln_L(e)', 'EX_nh4(e)', 'EX_lac_L(e)']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', -0.16148417411006594), ('biomass_cho_prod', 0.2732080194538897), ('EX_glc(e)', 0.026678345696871015), ('EX_gln_L(e)', 0.18120671233647678), ('EX_nh4(e)', 0.02289549746071406), ('EX_lac_L(e)', 0.3345272509419825)]

Percentage accuracy for solution:  0.5
CPU times: total: 30min 52s
Wall time: 30min 57s


In [7]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
-0.214539773970104*EX_glc(e) + 0.214539773970104*EX_glc(e)_reverse_bcf3e - 0.231063418448931*EX_gln_L(e) + 0.231063418448931*EX_gln_L(e)_reverse_75782 + 0.00326074846618441*EX_lac_L(e) - 0.00326074846618441*EX_lac_L(e)_reverse_32b05 + 0.00332702703955666*EX_nh4(e) - 0.00332702703955666*EX_nh4(e)_reverse_db85a + 0.161442041261726*biomass_cho_prod - 0.161442041261726*biomass_cho_prod_reverse_1b5b7 + 0.386366990813498*igg_formation - 0.386366990813498*igg_formation_reverse_7519c
[0.         0.00230909 0.39988603 0.26800271]
[1 1 1 1]

igg flux:  0.0

biomass flux:  0.0023090879865199


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.01978,6,4.59%
asn_L[e],EX_asn_L(e),0.04038,4,6.25%
asp_L[e],EX_asp_L(e),0.009341,4,1.45%
chol[e],EX_chol(e),0.002774,5,0.54%
cys_L[e],EX_cys_L(e),0.00522,3,0.61%
fol[e],EX_fol(e),0.0004567,19,0.34%
glc_D[e],EX_glc(e),0.1984,6,46.04%
gln_L[e],EX_gln_L(e),0.06703,5,12.97%
his_L[e],EX_his_L(e),0.003297,6,0.77%
hxan[e],EX_hxan(e),0.006195,5,1.20%

Metabolite,Reaction,Flux,C-Number,C-Flux
2hb[e],EX_2hb(e),-0.0008222,4,0.13%
2hyoxplac[e],EX_2hyoxplac(e),-0.004618,8,1.48%
2mcit[e],EX_2mcit(e),-0.002466,7,0.69%
34dhoxpeg[e],EX_34dhoxpeg(e),-0.001382,8,0.44%
3mob[e],EX_3mob(e),-0.00409,5,0.82%
3mox4hoxm[e],EX_3mox4hoxm(e),-0.00648,9,2.34%
4mop[e],EX_4mop(e),-0.01135,6,2.74%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.37%
acald[e],EX_acald(e),-0.004619,2,0.37%
co2[e],EX_co2(e),-0.09362,1,3.76%


In [8]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": 1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', 1.0)]

Percentage accuracy for solution:  0.5
CPU times: total: 4min
Wall time: 4min 1s


In [9]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
1.0*igg_formation - 1.0*igg_formation_reverse_7519c
[8.98698352e-05 0.00000000e+00 1.54981036e-01 0.00000000e+00]
[1 1 1 1]

igg flux:  8.98698351718184e-05

biomass flux:  0.0


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.005845,6,1.70%
asn_L[e],EX_asn_L(e),0.004673,4,0.90%
asp_L[e],EX_asp_L(e),0.004134,4,0.80%
cys_L[e],EX_cys_L(e),0.002876,3,0.42%
fol[e],EX_fol(e),0.0004567,19,0.42%
glc_D[e],EX_glc(e),0.1984,6,57.57%
gln_L[e],EX_gln_L(e),0.06703,5,16.21%
his_L[e],EX_his_L(e),0.003297,6,0.96%
ile_L[e],EX_ile_L(e),0.002516,6,0.73%
leu_L[e],EX_leu_L(e),0.008088,6,2.35%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-8.987e-05,95,0.57%
4abut[e],EX_4abut(e),-0.05295,4,14.24%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.61%
acald[e],EX_acald(e),-0.0002711,2,0.04%
ala_L[e],EX_ala_L(e),-0.0266,3,5.36%
bhb[e],EX_bhb(e),-0.009806,4,2.64%
co2[e],EX_co2(e),-0.0126,1,0.85%
for[e],EX_for(e),-5.396e-05,1,0.00%
h[e],EX_h(e),-0.2891,0,0.00%
h2o[e],EX_h2o(e),-0.3609,0,0.00%


In [10]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": 1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['biomass_cho_prod']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('biomass_cho_prod', 1.0)]

Percentage accuracy for solution:  0.5
CPU times: total: 3min 47s
Wall time: 3min 48s


In [11]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
1.0*biomass_cho_prod - 1.0*biomass_cho_prod_reverse_1b5b7
[0.         0.00230909 0.12759586 0.        ]
[1 1 1 1]

igg flux:  0.0

biomass flux:  0.0023090879865199


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.0007208,6,1.09%
asn_L[e],EX_asn_L(e),0.0006098,4,0.61%
asp_L[e],EX_asp_L(e),0.0011,4,1.11%
chol[e],EX_chol(e),0.0001539,5,0.19%
cys_L[e],EX_cys_L(e),0.0002425,3,0.18%
fol[e],EX_fol(e),0.0004567,19,2.18%
glc_D[e],EX_glc(e),0.0502,6,75.75%
gln_L[e],EX_gln_L(e),0.004179,5,5.25%
his_L[e],EX_his_L(e),0.0007684,6,1.16%
hxan[e],EX_hxan(e),0.0002436,5,0.31%

Metabolite,Reaction,Flux,C-Number,C-Flux
5mthf[e],EX_5mthf(e),-0.0004567,20,3.04%
acald[e],EX_acald(e),-0.001383,2,0.92%
ala_L[e],EX_ala_L(e),-0.001439,3,1.44%
for[e],EX_for(e),-0.0002525,1,0.08%
h[e],EX_h(e),-0.08304,0,0.00%
h2o[e],EX_h2o(e),-0.1138,0,0.00%
hco3[e],EX_hco3(e),-0.005625,1,1.87%
lac_D[e],EX_lac_D(e),-4.952e-05,3,0.05%
lac_L[e],EX_lac_L(e),-0.003405,3,3.40%
mal_L[e],EX_mal_L(e),-0.002877,4,3.83%


In [7]:
%%time

# Slower version of this script which looks at standard FBA solutions to find an optimal objective function for the qualitative experimental data

model.objective = {}

# Define qualitative constraints (reaction ID -> expected flux direction)
qualitative_constraints = {
    "igg_formation": 1,   # IgG secretion (positive flux)
    "biomass_cho_prod": 1,   # Biomass secretion (positive flux)
    "EX_nh4(e)": 1,  # Ammonia uptake (negative flux)
    "EX_lac_L(e)": 1   # Glucose uptake (negative flux)
}

# Define qualitative criteria reactions and the reactions to include in the objective
selected_qualitative_reactions = list(qualitative_constraints.keys())  # These are the reactions for qualitative criteria
objective_reactions = ['igg_formation', 'biomass_cho_prod']  # Reactions that can be included in the objective function

# Run FBA for a given vector of objective coefficients for the reactions above, and compute difference from qualitative success criteria
def qualitative_objective_difference(c):
    """Compute mismatch score between predicted and qualitative fluxes for a given objective function."""
    # Set the objective coefficients for each reaction
    for rxn_id, coef in zip(objective_reactions, c):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
    
    # Solve the FBA problem for the given objective function
    solution = model.optimize()
    
    # Compute qualitative fluxes (from qualitative constraints)
    fluxes = solution.fluxes[selected_qualitative_reactions]
    qualitative_fluxes = np.array([qualitative_constraints[rxn] for rxn in selected_qualitative_reactions])
    
    # Compute agreement (penalise mismatches)
    difference = np.sum(np.sign(fluxes) != qualitative_fluxes)
    
    # Return the sum of the qualitative mismatch
    return difference

# Define the bounds for each reaction coefficient in the objective functions to test
bounds = [(-1, 1)] * len(objective_reactions)

# Perform Simulated Annealing to find global minimum value for the difference of FBA solutions from the qualitative criteria for all combinations of objective function coefficients, hence an optimal objective function
# See for summary of method -> https://en.wikipedia.org/wiki/Simulated_annealing -> other potential algorithms that could be used here are Bayesian Optimization, Random Search, and Particle Swarm Optimization
result = dual_annealing(qualitative_objective_difference, bounds)

#Scaling optimal objective function so coefficients sum to 1
scaled_result = (result.x / np.sum(np.abs(result.x)))

# Print the result
print("Optimal solution:", list(zip(objective_reactions, scaled_result)))
print("\nPercentage accuracy for solution: ", (1 - (qualitative_objective_difference(result.x)/len(selected_qualitative_reactions))))

Optimal solution: [('igg_formation', 0.5397261914970819), ('biomass_cho_prod', 0.46027380850291816)]

Percentage accuracy for solution:  1.0
CPU times: total: 9min 7s
Wall time: 9min 10s


In [8]:
# summary of the solution from the faster standard FBA script

with model:
    model.objective = {}
    for rxn_id, coef in zip(objective_reactions, scaled_result):
        model.reactions.get_by_id(rxn_id).objective_coefficient = coef
        
    print('the current model objective function is:',model.objective)
    solution = model.optimize()

    print(np.array([model.solver.variables[rxn_id].primal for rxn_id in selected_qualitative_reactions]))
    print(np.fromiter((qualitative_constraints[rxn_id] for rxn_id in selected_qualitative_reactions), dtype=int))
    
    print('\nigg flux: ', solution.fluxes.get('igg_formation'))
    print('\nbiomass flux: ', solution.fluxes.get('biomass_cho_prod'))
    display(model.summary())

the current model objective function is: Maximize
0.460273808502918*biomass_cho_prod - 0.460273808502918*biomass_cho_prod_reverse_1b5b7 + 0.539726191497082*igg_formation - 0.539726191497082*igg_formation_reverse_7519c
[6.80931304e-05 2.30908799e-03 1.04837436e-01 3.40523264e-03]
[1 1 1 1]

igg flux:  6.809313035805236e-05

biomass flux:  0.0023090879865199


Metabolite,Reaction,Flux,C-Number,C-Flux
arg_L[e],EX_arg_L(e),0.007398,6,2.18%
asn_L[e],EX_asn_L(e),0.004151,4,0.82%
asp_L[e],EX_asp_L(e),0.004232,4,0.83%
chol[e],EX_chol(e),0.0001859,5,0.05%
cys_L[e],EX_cys_L(e),0.002422,3,0.36%
fol[e],EX_fol(e),0.0004567,19,0.43%
glc_D[e],EX_glc(e),0.1984,6,58.44%
gln_L[e],EX_gln_L(e),0.06703,5,16.46%
his_L[e],EX_his_L(e),0.003297,6,0.97%
hxan[e],EX_hxan(e),0.0002436,5,0.06%

Metabolite,Reaction,Flux,C-Number,C-Flux
igg[g],DM_igg[g],-6.809e-05,95,0.43%
34dhoxpeg[e],EX_34dhoxpeg(e),-9.625e-05,8,0.05%
4abut[e],EX_4abut(e),-0.0537,4,14.32%
5mthf[e],EX_5mthf(e),-0.0004567,20,0.61%
ala_L[e],EX_ala_L(e),-0.03065,3,6.13%
bhb[e],EX_bhb(e),-0.006315,4,1.68%
co2[e],EX_co2(e),-0.0126,1,0.84%
for[e],EX_for(e),-0.0003065,1,0.02%
h[e],EX_h(e),-0.3103,0,0.00%
h2o[e],EX_h2o(e),-0.3545,0,0.00%
