### Analyze blocked reactions for ThermoModels under specific regression conditions

In [3]:
import os.path as path
import glob, os
from datetime import datetime
from importlib.metadata import version
import cobra
import thermo_flux
from thermo_flux.io import load_excel as ex
from thermo_flux.core.model import ThermoModel
from equilibrator_api import  Q_
import pandas as pd
from thermo_flux.io import helper_load as hl
import numpy as np
from thermo_flux.io import load_excel as ex
from scripts.logger import write_to_log
import gurobipy as gp
from gurobipy import GRB
from scripts.gen_model import gen_model
from scripts.gen_model import apply_physio_data
from scripts.gen_model import constrain_bounds_fva
from scripts.reaction_utils import list_blocked_reactions
from scripts.reaction_utils import count_blocked_pathways
from scripts.reaction_utils import plot_calc_vs_exp
from thermo_flux.solver.gurobi import compute_IIS


In [5]:
INPUT_MODEL = "datafiles/model.xlsx"
INPUT_KEGGS = "datafiles/ecoli_kegg_id.csv"
INPUT_REED = "regression/reed.csv"
INPUT_INCHI = "regression/InChIs.csv"
INPUT_GAMS = "regression/model_Ecoli_from-gams.xlsx"
INPUT_EXP_DATA = "regression/allPhysioData_formatted_forGSM_20230831.csv"
INPUT_EXP_CONC = "regression/allConcRange_20230912.csv"
INPUT_METABOLOMICS = "regression/metabolomics-Kochanowski_20230925.csv"

MODEL_NAME = "ecoli"

OUTPUT_DIR = "output"
OUTPUT_NAME = f"blocked_reactions"
OUTPUT_LOG = f"{OUTPUT_DIR}{path.sep}{OUTPUT_NAME}_log.txt"

CONDITIONS_TO_REGRESS = ["WT-Glc_I", "WT-Gal_I", "WT-Fruc_I", "WT-Mann_I", "dptsG-Glc_I", 
                         "WT-Ace_I", "WT-Succ_I", "WT-Fum_I", "WT-Glyc_I", "WT-Pyr_I",
                         "WT-GlyCAA_II"]

CONDITIONS_TO_REGRESS = ["WT-Glyc_I"]

INCLUDE_CO2 = True
INCLUDE_O2 = True
ALLOW_OTHER_EXCRETION = False
RELAX_EXP_FLUX_BOUNDS = 2.0

time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
write_to_log(OUTPUT_LOG, f"Started analysis at: {time}", "w")

# Write package versions:
modules = ["pandas", "numpy", "equilibrator_api", "cobra"]
write_to_log(OUTPUT_LOG, f"Package versions used:")
versions_packages = [f"  {m}: {version(m)}\n" for m in modules]
write_to_log(OUTPUT_LOG, "".join(versions_packages))

In [None]:
plot_calc_vs_exp("ecoli", CONDITIONS_TO_REGRESS[0], "solutions/ecoli_WT_Glc_I.csv", "regression/allPhysioData_formatted_forGSM_20230831.csv")

OSError: [Errno 22] Invalid argument: 'regression\x07llPhysioData_formatted_forGSM_20230831.csv'

In [None]:
tmodel = gen_model(MODEL_NAME, INPUT_MODEL, INPUT_KEGGS, INPUT_REED, INPUT_INCHI, INPUT_GAMS, OUTPUT_LOG, True, True)

In [None]:
for c in CONDITIONS_TO_REGRESS:
    print(f"STARTING: {c}")
    tmodel_p = gen_model(MODEL_NAME, INPUT_MODEL, INPUT_KEGGS, INPUT_REED, INPUT_INCHI, INPUT_GAMS, OUTPUT_LOG, True, True)
    tmodel_p = apply_physio_data(tmodel_p, c, INPUT_EXP_DATA, INPUT_EXP_CONC, INPUT_METABOLOMICS, INPUT_GAMS, RELAX_EXP_FLUX_BOUNDS, INCLUDE_CO2, INCLUDE_O2, ALLOW_OTHER_EXCRETION, OUTPUT_LOG)

    blocked_p = list_blocked_reactions(tmodel_p, c, OUTPUT_LOG)
    print(len(blocked_p))
    # Keep SERASr as model becomes infeasible after removing it
    to_keep = ["SERASr"]
    blocked_p = [x for x in blocked_p if x not in to_keep]
    print(len(blocked_p))

    tmodel_p.remove_reactions(blocked_p)
    for rxn in tmodel_p.reactions:
        thermo_flux.tools.drg_tools.reaction_balance(rxn, balance_charge=True, balance_mg=False)
    tmodel_p.update_thermo_info(fit_unknown_dfG0=True)

    tmodel_p.m = None  
    tmodel_p.objective = tmodel_p.reactions.biomass_EX  
    tmodel_p.add_TFBA_variables() 

    tmodel_p.m.Params.TimeLimit = 3600
    tmodel_p.m.Params.Threads = 16
    tmodel_p.m.optimize() 

    tmodel_p.m.write(f"solutions/ecoli_{c}_SOLUTION.sol")
    
    sol = tmodel_p.solution()
    sol.to_csv(f"solutions/ecoli_{c}.csv")

In [None]:
tmodel_p = gen_model(MODEL_NAME, INPUT_MODEL, INPUT_KEGGS, INPUT_REED, INPUT_INCHI, INPUT_GAMS, OUTPUT_LOG, True, True)
tmodel_p = apply_physio_data(tmodel_p, CONDITIONS_TO_REGRESS[0], INPUT_EXP_DATA, INPUT_EXP_CONC, INPUT_METABOLOMICS, INPUT_GAMS, RELAX_EXP_FLUX_BOUNDS, INCLUDE_CO2, INCLUDE_O2, ALLOW_OTHER_EXCRETION, OUTPUT_LOG)

blocked_p = list_blocked_reactions(tmodel_p, CONDITIONS_TO_REGRESS[0], OUTPUT_LOG)
print(len(blocked_p))
# Keep SERASr as model becomes infeasible after removing it
to_keep = ["SERASr"]
blocked_p = [x for x in blocked_p if x not in to_keep]
print(len(blocked_p))

tmodel_p.remove_reactions(blocked_p)
for rxn in tmodel_p.reactions:
    thermo_flux.tools.drg_tools.reaction_balance(rxn, balance_charge=True, balance_mg=False)
tmodel_p.update_thermo_info(fit_unknown_dfG0=True)

tmodel_p.m = None  
tmodel_p.objective = tmodel_p.reactions.biomass_EX  
tmodel_p.add_TFBA_variables() 

tmodel_p.m.Params.TimeLimit = 3600
tmodel_p.m.Params.Threads = 16
tmodel_p.m.optimize() 

In [None]:
sol = tmodel_p.solution()
print(sol)
sol.to_csv("solutions/ecoli_WT_Glc_I.csv")

In [None]:
tmodel_p = gen_model(MODEL_NAME, INPUT_MODEL, INPUT_KEGGS, INPUT_REED, INPUT_INCHI, INPUT_GAMS, OUTPUT_LOG, True, True)
tmodel_p = apply_physio_data(tmodel_p, CONDITIONS_TO_REGRESS[0], INPUT_EXP_DATA, INPUT_EXP_CONC, INPUT_METABOLOMICS, INPUT_GAMS, RELAX_EXP_FLUX_BOUNDS, INCLUDE_CO2, INCLUDE_O2, ALLOW_OTHER_EXCRETION, OUTPUT_LOG)
#tmodel_p = constrain_bounds_fva(tmodel_p, OUTPUT_LOG) # comparison

In [None]:
blocked_p = list_blocked_reactions(tmodel_p, CONDITIONS_TO_REGRESS[0], OUTPUT_LOG)
print(len(blocked_p))
# Keep SERASr as model becomes infeasible after removing it
to_keep = ["SERASr"]
blocked_p = [x for x in blocked_p if x not in to_keep]
print(len(blocked_p))

In [None]:
blocked = list_blocked_reactions(tmodel, "BASE", OUTPUT_LOG)

In [None]:
# Try to find out which reaction is causing the model to become infeasible
reaction_feasibility = dict()

for b in blocked_p:
    #tmodel = gen_model(MODEL_NAME, INPUT_MODEL, INPUT_KEGGS, INPUT_REED, INPUT_INCHI, INPUT_GAMS, OUTPUT_LOG, True, True)
    tmodel_p.remove_reactions([b])
    for rxn in tmodel_p.reactions:
        thermo_flux.tools.drg_tools.reaction_balance(rxn, balance_charge=True, balance_mg=False)
    tmodel_p.update_thermo_info(fit_unknown_dfG0=True)
        
    tmodel_p.m = None  
    tmodel_p.objective = tmodel_p.reactions.biomass_EX  
    tmodel_p.add_TFBA_variables() 

    tmodel_p.m.Params.TimeLimit = 120        
    tmodel_p.m.Params.Threads = 16           
    tmodel_p.m.Params.Method = 0             

    tmodel_p.m.Params.MIPFocus = 0           
    tmodel_p.m.Params.Heuristics = 0.05      
    tmodel_p.m.Params.Cuts = 0               
    tmodel_p.m.Params.Presolve = 1           

    tmodel_p.m.Params.LogToConsole = 0
    tmodel_p.m.Params.OutputFlag = 0

    tmodel_p.m.optimize()

    if tmodel_p.m.Status == 2 or tmodel_p.m.Status == 9:
        print(f"Removing reaction {b} remains feasible")
        reaction_feasibility[b] = 1
    elif tmodel_p.m.Status == 3 or tmodel_p.m.Status == 4:
        print(f"Removing reaction {b} causes infeasibility")
        reaction_feasibility[b] = 0



In [None]:
count_blocked_pathways(blocked, "ecoli", "BASE", INPUT_MODEL)

In [None]:
diff = list(set(blocked_p) - set(blocked))
print(diff)

In [None]:
# Update reactions and stoichometric matrices
tmodel.remove_reactions(blocked)
for rxn in tmodel.reactions:
    thermo_flux.tools.drg_tools.reaction_balance(rxn, balance_charge=True, balance_mg=False)
tmodel.update_thermo_info(fit_unknown_dfG0=True)

In [None]:
count_blocked_pathways(blocked, "ecoli", "BASE", INPUT_MODEL)

In [None]:
tmodel.m = None  
tmodel.objective = tmodel.reactions.biomass_EX  
tmodel.add_TFBA_variables() 

In [None]:
tmodel.m.Params.TimeLimit = 3600
tmodel.m.Params.Threads = 16    
compute_IIS(tmodel)

In [None]:
tmodel_p.remove_reactions(blocked_p)
for rxn in tmodel_p.reactions:
    thermo_flux.tools.drg_tools.reaction_balance(rxn, balance_charge=True, balance_mg=False)
tmodel_p.update_thermo_info(fit_unknown_dfG0=True)

In [None]:

tmodel_p.m = None  
tmodel_p.objective = tmodel_p.reactions.biomass_EX  
tmodel_p.add_TFBA_variables() 

In [None]:
tmodel_p.m.Params.TimeLimit = 3600
tmodel_p.m.Params.Threads = 16
tmodel_p.m.optimize() 
#compute_IIS(tmodel_p)

In [None]:
tmodel_p.m.write("solutions/ecoli_WT_Glc_I_SOLUTION.sol")

In [None]:
# Thermodynamic FVA vs normal


In [None]:
disconnected = [m.id for m in tmodel.metabolites if len(m.reactions) == 0]
print("Disconnected metabolites:", disconnected)

In [None]:
tmodel_regressed = apply_physio_data(tmodel, "WT-Glc_I", INPUT_EXP_DATA, INPUT_EXP_CONC, INPUT_METABOLOMICS, INPUT_GAMS, RELAX_EXP_FLUX_BOUNDS, INCLUDE_CO2, INCLUDE_O2, ALLOW_OTHER_EXCRETION, OUTPUT_LOG)

In [None]:
blocked = list_blocked_reactions(tmodel_regressed, "WT-Glc_I", OUTPUT_LOG)

In [None]:
count_blocked_pathways(blocked, "ecoli", "WT-Glc_I", INPUT_MODEL)

In [None]:
rxns_df = pd.read_excel(INPUT_MODEL, sheet_name="Reactions")
rxns_df.columns = rxns_df.columns.str.strip()

blocked_ids = blocked 
blocked_info = rxns_df[rxns_df["Abbrevation"].isin(blocked_ids)]

nit_all = rxns_df[rxns_df["Subsystem"].str.contains("nitrogen", case=False, na=False)]
nit_blocked = blocked_info[blocked_info["Subsystem"].str.contains("nitrogen", case=False, na=False)]

print("Nitrogen total reactions:", len(nit_all))
print("Nitrogen blocked reactions:", len(nit_blocked))
print("Fraction blocked:", len(nit_blocked) / max(1, len(nit_all)))


In [None]:
disconnected = [m.id for m in tmodel.metabolites if len(m.reactions) == 0]
print("Disconnected metabolites:", disconnected)