# Probabilistic Thermodynamic Analysis (PTA) of *i*CH360

In [12]:
import cobra
import pta
import numpy as np
import pandas as pd
from cobra.flux_analysis.variability import find_blocked_reactions
from component_contribution.linalg import LINALG

import enkie
from pta.commons import Q as Q_
import importlib
importlib.reload(pta)
import pickle
from enkie.io.cobra import parse_metabolites


import sys
sys.path.append('../../utils/')
import visualisation_utils



  warn(


## Load model

In [13]:
model=cobra.io.read_sbml_model('../../Model/iCH360/Escherichia_coli_iCH360.xml')

'' is not a valid SBML 'SId'.


## Prepare for PTA

In [14]:
#Make the transhydrogenase reactions reversible for thermodynamic analysis
for r_id in ['NADH17pp','THD2pp']:
    model.reactions.get_by_id(r_id).lower_bound=-1000
#Add lower bound on growth rate based on the measured value from the metabolomic dataset used by PTA
model.reactions.Biomass.lower_bound=0.65

pta.prepare_for_pta(model)

Read LP format model from file C:\Users\marco\AppData\Local\Temp\tmp5pn9ovgf.lp
Reading time = 0.01 seconds
: 304 rows, 698 columns, 2988 nonzeros


## Get list of candidate reactions to which thermodynamic constraints are added

In [15]:
constrained_reactions=pta.get_candidate_thermodynamic_constraints(model)
#Remove reactions with infinite uncertainty, as these free energies are not informative and including them may just cause numerical issues
drg0_prime_cov=pd.read_csv('../../Thermodynamics/free_energy_estimates/drg0_estimates/drg0_cov_df.csv',index_col=0).squeeze()
infinite_uncertainty_reactions=[r_id for r_id in drg0_prime_cov.index if drg0_prime_cov.loc[r_id,r_id]>1e8]
print(f"Removing {infinite_uncertainty_reactions} from the themodynamically constrained reactions since since their uncertainty is virtually infinite")
for r_id in infinite_uncertainty_reactions:
    if r_id in constrained_reactions:
        constrained_reactions.remove(r_id)
constrained_reactions_cobra=[model.reactions.get_by_id(r) for r in constrained_reactions]
constrained_metabolites=list(set([m.id for r in constrained_reactions_cobra for m in r.metabolites.keys()]))

Removing ['GMPS2', 'PRAMPC', 'PRFGS', 'GRXR', 'RNDR1b', 'RNDR2b', 'IG3PS', 'PRAIS'] from the themodynamically constrained reactions since since their uncertainty is virtually infinite


## Load thermodynamic estimates

In [16]:
# Transformed formation energies (with multi-compartment corrections)
dfg0_prime_mean=pd.read_csv('../../Thermodynamics/free_energy_estimates/dfg0_estimates/dfg0_prime_mean_mcc.csv',index_col=0).squeeze()
# Square root of the covariance matrix for the estimates
dfg0_prime_cov_sqrt=pd.read_csv('../../Thermodynamics/free_energy_estimates/dfg0_estimates/dfg0_cov_sqrt.csv',index_col=0)

#Index everything correctly
metabolites_in_model=[m.id for m in model.metabolites]
dfg0_prime_mean=dfg0_prime_mean.loc[constrained_metabolites]
dfg0_prime_cov_sqrt=dfg0_prime_cov_sqrt.loc[constrained_metabolites,:]

#Put together into a tuple
dfg0_estimates=(Q_(dfg0_prime_mean.values.reshape(-1,1),'kJ / mol'),
                Q_(dfg0_prime_cov_sqrt.values,'kJ / mol'))

In [17]:
#Construct the stoichiometric matrix and index correctly
S=cobra.util.array.create_stoichiometric_matrix(model,'DataFrame')
S=S.loc[dfg0_prime_mean.index,constrained_reactions]
S.head()

Unnamed: 0,NDPK5,SHK3Dr,NDPK6,NDPK8,DHORTS,OMPDC,G5SD,CS,ICDHyr,ACALD,...,EAR60y,EAR60x,EAR80x,NADH16pp,NADH17pp,ETOHtrpp,VPAMTr,PIt2rpp,PItpp,SUCCt1pp
glc__D_p,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
anth_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3oddecACP_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fdp_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
glx_c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Load compartment parameters and concentration priors (experimental measurements)

In [18]:
parameters = enkie.CompartmentParameters.load('e_coli')
concentrations = pta.ConcentrationsPrior.load('M9_aerobic')
concentrations.add(pta.ConcentrationsPrior.load('ecoli_M9_glc'))

In [19]:
# Parse metabolites from the model.
metabolites = parse_metabolites(
    model, constrained_metabolites, 'bigg.metabolite'
)

#Construct thermodynmic space
thermodynamic_space = pta.ThermodynamicSpace(
    reaction_ids=S.columns.tolist(),
    metabolites=list(metabolites.values()),
    S_constraints=S.values,
    estimator=None,
    parameters=parameters,
    concentrations=concentrations,
    dfg0_estimate=dfg0_estimates
)

## Solve the PTA problem (may take a while)

In [20]:
problem = pta.PmoProblem(model, thermodynamic_space,solver='GUROBI')
problem.solve(verbose=True)

                                     CVXPY                                     
                                     v1.3.1                                    
(CVXPY) Jun 24 11:32:26 AM: Your problem has 1008 variables, 8 constraints, and 0 parameters.
(CVXPY) Jun 24 11:32:26 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jun 24 11:32:26 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jun 24 11:32:26 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jun 24 11:32:26 AM: Compiling problem (target solver=GUROBI).
(CVXPY) Jun 24 11:32:26 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffin

'optimal'

In [21]:
# Analyze the predicted concentrations and reaction energies, revealing potential
# knowledge gaps and inaccuracies in the model.
pta.QuantitativeAssessment(problem).summary()

Quantitative thermodynamic assessment summary:
------------------------------------------------
concentrations: mM, free energies: kJ/mol

> The following metabolites have been flagged as anomalies because their predicted concentration has an absolute z-score greater than 1.0:
           id   conc  z_log_c
218   aspsa_c  0.004   -2.058
210   glu5p_c  0.005   -1.939
51    5aizc_c  0.007   -1.782
129   5caiz_c  8.324    1.782
164      o2_c  0.008   -1.708
241   acg5p_c  0.008   -1.684
59    acglu_c  6.839    1.684
165     3pg_c  5.600    1.584
256    3php_c  0.010   -1.584
98      coa_c  0.011   -1.533
248   4pasp_c  0.019   -1.244
23   acon_C_c  0.021   -1.193
90    mlthf_c  2.549    1.191
139  fprica_c  0.028   -1.059

> The following non-intracellular metabolites have been flagged as anomalies because they have concentation greater than 10 mM:
       id  conc    z_log_c
235  pi_e  55.7 -1.216e-06

> The following reactions have been flagged as anomalies because their predicted free en

# Save PTA Solution for further analysis

In [22]:
pta_solution=pta.QuantitativeAssessment(problem)
reaction_data=pta_solution.reactions_df
pta_solution.reactions_df.to_csv('out/pta_fluxes.csv')
pta_solution.metabolites_df.to_csv('out/pta_metabolites_concentrations.csv')

                             