# Using thermodynamic analysis to guide metabolic engineering
DO 12-6-2018  
Using Python 3 and eQuilibrator API  


In [1]:
#Changing directory to the github home folder /Ctherm_thermo
import os
os.chdir('/Users/satyakam/Dropbox/work/equilibrator-api-master')
import sys
sys.path.append('/Users/satyakam/Dropbox/work/sbtab-0.9.64')
#sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master')
#sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master/src/equilibrator_api')
sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master/src')
#sys.path.append('/Users/satyakam/Dropbox/work')


In [2]:
import numpy as np
from numpy import array, eye, log, zeros, matrix
from numpy.linalg import inv, solve
import pandas as pd
from equilibrator_api import Reaction, ComponentContribution, ReactionMatcher, CompoundMatcher, ParseError, Pathway
from equilibrator_api.bounds import Bounds
%matplotlib inline

## Set up translator for KEGG IDs
Note: I set these up as dataframes because I was troubleshooting an issue with duplicate KEGG IDs. Eventually I think these should be set up as dictionaries, to make the code more readable

In [3]:
# make a dictionary to translate KEGG IDs into human-readable abbreviations
keggTranslatorDf = pd.read_excel('KEGG_SEED_DO.xls')
kt = keggTranslatorDf #short name for easier typing

# translate KEGG ID to abbreviation
kta = kt.loc[:,['KEGG ID(S)', 'ABBREVIATION']]
kta['KEGG ID(S)'] = kta['KEGG ID(S)'].str.lower() # set to lower case for better matching
kta['ABBREVIATION'] = kta['ABBREVIATION'].str.lower() # set to lower case for better matching
kta.set_index('KEGG ID(S)', inplace = True)

# translate abbreviation to KEGG ID
atk = kt.loc[:,['ABBREVIATION', 'KEGG ID(S)']].sort_values(by = ['KEGG ID(S)'], ascending = True)
atk['KEGG ID(S)'] = atk['KEGG ID(S)'].str.lower() # set to lower case for better matching
atk['ABBREVIATION'] = atk['ABBREVIATION'].str.lower() # set to lower case for better matching
atk = atk.groupby('ABBREVIATION').first() # take the first KEGG ID in each group

atkDict = dict(zip(atk.index, atk['KEGG ID(S)'].str.upper()))
ktaDict = dict(zip(kta.index.str.upper(), kta['ABBREVIATION'].values))

## Set up model
* Choose reactions
* Set fluxes
* Set concentration bounds
* Set pH and ionic strength

In [4]:
os.chdir('/Users/satyakam/Dropbox/work/component_contribution_ctherm')

In [5]:
allRxnDf = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'reactions')
allRxnDf

Unnamed: 0,Name,PlaintextFormula,AbbreviationFormula,KeggFormula,Notes,ppi-pfk,atp-pfk,mal-snt,pdc,gapn,pyk,aldh-ndp,adh-ndp,mal-gapn,tsac
0,ATPase1,ATP + H2O <=> ADP + Pi,h2o + atp <=> adp + pi,C00002 + C00001 <=> C00008 + C00009,ATP hydrolysis for cellobiose transport,2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,CBP,Phosphate + Cellobiose <=> D-Glucose + Glucose...,pi + cellb <=> glc-D + g1p,C00009 + C00185 <=> C00031 + C00103,,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,BGL,Cellobiose + H2O <=> 2 D-Glucose,cellb + h2o <=>2 glc-D,C00185 + C00001 <=> 2 C00031,Beta glucosidase,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,GLK-GTP,D-Glucose + GTP <=> D-Glucose-6-phosphate + GDP,glc-D + gtp <=> g6p + gdp,C00031 + C00044 <=> C00092 + C00035,,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
4,GLK-ATP,D-Glucose + ATP <=> D-Glucose-6-phosphate + ADP,glc-D + atp <=> g6p + adp,C00031 + C00002 <=> C00092 + C00008,,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
5,PGMT,Glucose-1-phosphate <=> D-glucose-6-phosphate,g1p <=> g6p,C00103 <=> C00092,,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
6,PGI,D-Glucose-6-phosphate <=> D-Fructose-6-phosphate,g6p <=> f6p,C00092 <=> C00085,,2,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
7,PFK-PPi,PPi + D-fructose-6-phosphate <=> Phosphate + D...,ppi + f6p <=> pi + fdp + h,C00013 + C00085 <=> C00009 + C00354 + C00080,,2,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0
8,PFK-ATP,ATP + D-Fructose-6-phosphate <=> ADP + D-Fruct...,atp + f6p <=> adp + fdp,C00002 + C00085 <=> C00008 + C00354,,0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
9,FBA,"D-Fructose-1,6-bisphosphate <=> Glycerone-phos...",fdp <=> dhap + g3p,C00354 <=> C00111 + C00118,,2,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [6]:
# choose a flux set, and drop all of the zero-flux reactions
# loop over fluxsets to generate multiple models 
selectedRxnDf={}
fluxSet = 'ppi-pfk'
for f in range(len(fluxSet)):
    selectedRxnDf = allRxnDf.loc[allRxnDf[fluxSet] != 0, ['Name', 'AbbreviationFormula', 'KeggFormula', fluxSet]]
    selectedRxnDf.rename(columns = {fluxSet:'flux'}, inplace = True) # rename the flux columns to 'flux' to simplify subsequent
    #rxnlist[f]= selectedRxnDf  

In [7]:
#Import metabolite data from excel file
Met_bound = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'metabolite_bounds')
Alldata = pd.read_excel('./metabolite_data/lt_dataset2_quantified.xlsx')
Met_data = Alldata.loc[Alldata['Replicate'] != 0, ['Timepoint', 'KEGG_ID', 'compound','amount_int', 'Sample','Replicate','Maven filename']]
Met_data[:2]

Unnamed: 0,Timepoint,KEGG_ID,compound,amount_int,Sample,Replicate,Maven filename
0,2,C00197,3PG,217.958918,A,1,A1_1
1,2,C00197,3PG,137.397616,A,2,A1_2


In [8]:
Met_data_WT = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'WT_data')
Met_data_WT

Unnamed: 0,Name,KEGG_ID,Abbreviation,Concentration
0,Glucose-6-phosphate,C00092,g6p,0.008192
1,Fructose-6-phosphate,C00085,f6p,0.001494
2,Glucose-1-phosphate,C00103,g1p,0.006663
3,Fructose 1_6-bisphosphate,C00354,fdp,0.001497
4,Malate,C00149,mal-l,0.037807
5,DHAP,C00111,dhap,0.00127
6,GAP,C00118,g3p,0.000102
7,3-phosphoglycerate,C00197,3pg,0.001348
8,Phosphoenolpyruvate,C00074,pep,0.00069
9,pyruvate,C00022,pyr,0.012655


In [9]:
# parse the KeggFormula column to create eQuilibrator Reaction objects
# create reaction list
reactions = []
for i, row in selectedRxnDf.iterrows():
    rxn = Reaction.parse_formula(row['KeggFormula'], rid = row['Name'])
    if (rxn.check_full_reaction_balancing()):
        reactions.append(rxn)
    else:
        print('Error: reaction {} is not balanced'.format(row['AbbreviationFormula']))
        
# create flux list
fluxes = selectedRxnDf['flux'].values

# need to declare pH and ionic strength constants at the beginning
PH = 7.0
IS = 0.1

# calculate dGO_r_primes
dG0_r_primes = []
for r in reactions:
    result = r.dG0_prime(pH = PH, ionic_strength = IS)
    #print(result)
    dG0_r_primes.append(result)

In [10]:
# set up the model
pp = Pathway(reactions = reactions, fluxes = fluxes, dG0_r_primes = dG0_r_primes)

In [11]:
# Check MDF using Daniel's data
all_data={}
Uf = 1.5 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.
   
for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
    pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'])
for i, dat in Met_data_WT.iterrows():
    if dat['KEGG_ID'] not in ['C00020','C00035','C00044','C00022','C00103','C00004','C00005','C00006']:
        pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5) 
    #else:
        #pp.bounds.SetBounds(dat['KEGG_ID'],1e-6,1e-2) 
pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)       
mdf_data,con = pp.conc_mdf()
print(mdf_data.mdf)
k=0
atp_ratio=1
gtp_ratio=1
nadh_ratio=1
for r in mdf_data.reaction_data:
    print (r.reaction.reaction_id,r.dGr,r.shadow_price)
for c in mdf_data.compound_data:
    print(ktaDict[c.compound], c.concentration,con[k],mdf_data.model.concentration_bounds.GetBoundTuple(c.compound),c.shadow_price)
    k= k+1
    if c.compound in ['C00002']:
        atp_ratio=atp_ratio*c.concentration
    elif c.compound in ['C00008']:
        atp_ratio=atp_ratio/c.concentration
    elif c.compound in ['C00003']:
        nadh_ratio=nadh_ratio/c.concentration
    elif c.compound in ['C00004']:
        nadh_ratio=nadh_ratio*c.concentration
    elif c.compound in ['C00035']:
        gtp_ratio=gtp_ratio/c.concentration
    elif c.compound in ['C00044']:
        gtp_ratio=gtp_ratio*c.concentration
print('atp_ratio =',atp_ratio)
print('gtp_ratio =',gtp_ratio)
print('nadh_ratio =',nadh_ratio)        

0.9047175479724707
ATPase1 -43.10629668645875 0.0
CBP -0.9047175479724707 0.0
GLK-GTP -16.706740398555315 0.0
PGMT -8.627602452025005 -1.8161543077082945e-17
PGI -1.6911720565429516 -6.721491463453975e-17
PFK-PPi -6.063434676923654 -2.1284722967604256e-16
FBA -2.5011131286095214 -2.4645468699331246e-16
TPI -0.9047175479724725 0.14285714285714274
GAPDH -0.9047175479724654 0.1428571428571427
PGK-GTP -0.9047175479724885 0.14285714285714277
PGM -0.9047175479724707 0.14285714285714252
ENO -0.9047175479724778 0.14285714285714302
PPDK -4.82974137357021 0.0
PFOR -3.2543475057785134 0.0
RNF_PPiase -37.81888267338455 0.0
ALDH-NADH -0.9047175479724601 0.07142857142857147
ADH-NADH -0.9047175479724707 0.07142857142857123
NDK -0.9047175479724712 0.14285714285714282
ATPase2 -50.59428522514596 0.0
PTA -0.9047175479724672 0.0
ACK -9.21209596957128 0.0
h2o 1.0 (1.0, 1.0) (1.0, 1.0) -0.35394664285714317
atp 0.0013522352819870895 (0.0013522352819870882, 0.0013522352819871086) (0.0013522352819870895, 0.004

In [12]:
# impose only one concentration bound at a time to check for infeasible bounds
all_data={}
Uf = 0.3 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        ref_conc[dat['KEGG_ID']][dat['Replicate']]=dat['amount_int']
for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    print(smple,tym,rep)
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00020','C00035','C00044','C00022','C00103','C00004','C00005','C00006']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            try:
                for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
                    pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
                for i, dat in Met_data.iterrows():
                # skip cofactors and impose bound on metsbolites one at a time    
                    if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ["C00002","C00008","C00020","C00044","C00035","C00003","C00004","C00005","C00006","C00469"]):
                        all_data[sm][i]={}
                        lb = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']][dat['Replicate']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']][dat['Replicate']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                        #print(dat['KEGG_ID'], lb, ub)
                        if tym  == 2 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                            pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                        elif tym ==5 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                            pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                        elif tym ==9 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                            pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                        if tym  == 2 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                            pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                        elif tym ==5 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                            pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                        elif tym ==9 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                            pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
                        mdf_data = pp.calc_mdf()
                        #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
                        atp_ratio=1
                        gtp_ratio=1
                        nadh_ratio=1
                        for c in mdf_data.compound_data:
                            if c.compound in ['C00002']:
                                atp_ratio=atp_ratio*c.concentration
                            elif c.compound in ['C00008']:
                                atp_ratio=atp_ratio/c.concentration
                            elif c.compound in ['C00003']:
                                nadh_ratio=nadh_ratio/c.concentration
                            elif c.compound in ['C00004']:
                                nadh_ratio=nadh_ratio*c.concentration
                            elif c.compound in ['C00035']:
                                gtp_ratio=gtp_ratio/c.concentration
                            elif c.compound in ['C00044']:
                                gtp_ratio=gtp_ratio*c.concentration
                            #print(c.compound, log(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)), sm,log(all_data[0]['mdf'].model.concentration_bounds.GetBoundTuple(c.compound)))
                        #print('atp_ratio =',atp_ratio)
                        #print('gtp_ratio =',gtp_ratio)
                        #print('nadh_ratio =',nadh_ratio)
                        print(ktaDict[dat['KEGG_ID']],mdf_data.mdf,atp_ratio,gtp_ratio,nadh_ratio)
                        #print("__________________")
                        all_data[sm]['sample']=smple
                        all_data[sm]['timepoint']=tym
                        all_data[sm]['replicate']=rep
                        #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
                        all_data[sm]['mdf']=mdf_data
            except:
                print("no ref for", dat2['KEGG_ID'])

A 2 1
g6p 2.9971712482048325 9.999999999999986 90.8685546248938 0.009999999999999962
f6p 3.1167946615713937 10.00000000000001 95.36345778783317 0.00999999999999996
fdp 2.4682707783238484 9.999999999999993 73.40142764815582 0.009999999999999926
mal-l 3.2107686908816575 9.999999999999993 99.04997463072768 0.009999999999999997
dhap 1.630270777677417 10.000000000000002 52.337674074941816 0.009999999999999962
3pg 1.1062024517143314 9.999999999999977 42.3596452942939 0.009999999999999978
pep 1.2353884281580072 9.999999999999993 44.62691572243304 0.009999999999999997
accoa 3.2107686908816575 9.999999999999993 99.04997463072768 0.009999999999999997
coa 2.992768958140296 9.999999999999977 90.7072411437511 0.016954089179735527
A 5 1
g6p 2.9797695160533877 9.999999999999948 90.23257088737147 0.011569649990486661
f6p 3.110194115640919 9.999999999999922 95.10974191105062 0.012195002467234133
fdp 2.963148160645254 9.999999999999977 89.62926440096678 0.018214917941756184
mal-l 3.1261633187608844 10.0

In [52]:
# leave one out concentration bound at a time to check for infeasible bounds
# Concentrations are scaled with respect to the 2nd time point of sample B, each replicate has its own reference
all_data={}
Uf = 0.0 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5:
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
            if not ref_conc[dat['KEGG_ID']]:
                ref_conc[dat['KEGG_ID']]=dat['amount_int']
            else:
                ref_conc[dat['KEGG_ID']]=(ref_conc[dat['KEGG_ID']]+dat['amount_int'])/2
print("Compound name ","Pathway MDF","atp_ratio","gtp_ratio","nadh_ratio")
for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    print(smple,tym,rep)
    for i3, dat3 in Met_data_WT.iterrows():
        if dat3['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
            for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
                pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'])
            for i2, dat2 in Met_data_WT.iterrows():
                if dat2['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
                    #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
                    try:
                        for i, dat in Met_data.iterrows():
                        # skip cofactors and impose bound on metabolites one at a time    
                            if (dat2['KEGG_ID']==dat['KEGG_ID']) and (dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006','C00469']):
                                all_data[sm][i]={}
                                lb = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']])*(1-Uf)
                                #print(dat['KEGG_ID'],dat['peakAreaTop'],ref_conc[dat['KEGG_ID']][dat['Replicate']],dat2['Concentration'])
                                ub = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']])*(1+Uf)
                                if lb < DEFAULT_LB:    
                                    lb = DEFAULT_LB

                                if lb > DEFAULT_UB:
                                    lb = DEFAULT_UB

                                if ub < DEFAULT_LB:
                                    ub = DEFAULT_LB

                                if ub > DEFAULT_UB:
                                    ub = DEFAULT_UB

                                #leave one out
                                if (dat2['KEGG_ID']!=dat3['KEGG_ID']):
                                    pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                                #print(dat['KEGG_ID'], lb, ub)
                                if tym  == 2 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                                    pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                                elif tym ==5 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                                    pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                                elif tym ==9 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                                    pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                                if tym  == 2 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                                    pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                                elif tym ==5 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                                    pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                                elif tym ==9 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                                    pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
                    except:
                        print("no ref for", dat2['KEGG_ID'])
            mdf_data = pp.calc_mdf()
            #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
            atp_ratio=1
            gtp_ratio=1
            nadh_ratio=1
            for c in mdf_data.compound_data:
                if c.compound in ['C00002']:
                    atp_ratio=atp_ratio*c.concentration
                elif c.compound in ['C00008']:
                    atp_ratio=atp_ratio/c.concentration
                elif c.compound in ['C00003']:
                    nadh_ratio=nadh_ratio/c.concentration
                elif c.compound in ['C00004']:
                    nadh_ratio=nadh_ratio*c.concentration
                elif c.compound in ['C00035']:
                    gtp_ratio=gtp_ratio/c.concentration
                elif c.compound in ['C00044']:
                    gtp_ratio=gtp_ratio*c.concentration
                #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
            print(ktaDict[dat3['KEGG_ID']],mdf_data.mdf,atp_ratio,gtp_ratio,nadh_ratio)

            all_data[sm]['sample']=smple
            all_data[sm]['timepoint']=tym
            all_data[sm]['replicate']=rep

            all_data[sm]['mdf']=mdf_data


Compound name  Pathway MDF atp_ratio gtp_ratio nadh_ratio
A 2 1
g6p -3.4742323442578105 14.99464116629347 9.999999999999993 0.010000000000000031
f6p -3.4742323442578105 14.99464116629347 9.999999999999993 0.010000000000000031
fdp -3.474232344257814 14.994641166293468 9.999999999999977 0.00999999999999998
mal-l -3.4742323442578176 14.994641166293494 9.999999999999977 0.009999999999999962
dhap -2.14536001882907 10.00000000000001 11.402427092543439 0.009999999999999978
g3p -3.474232344257807 14.994641166293508 10.000000000000028 0.009999999999999995
3pg -1.6590330675472238 10.00000000000001 13.87534703540769 0.009999999999999997
pep -3.474232344257821 14.994641166293494 9.999999999999977 0.009999999999999997
accoa -3.4742323442578105 14.99464116629347 9.999999999999993 0.010000000000000031
coa -3.4742323442578105 14.99464116629352 10.00000000000003 0.009999999999999978
A 5 1
g6p -0.8668970462380656 9.999999999999993 19.102693438612345 0.03111893501045608
f6p -0.8668970462380727 9.99999999

In [30]:
# impose only one concentration bound at a time to check for infeasible bounds
all_data={}
Uf = 0.00 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==1:
            ref_conc[dat['KEGG_ID']]=dat['amount_int']
        else:
            ref_conc[dat['KEGG_ID']]=(ref_conc[dat['KEGG_ID']]+dat['amount_int'])/2

for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00044','C00035','C00002','C00004','C00005','C00008','C00003','C00020','C00197']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ["C00002","C00008","C00020","C00044","C00035","C00003","C00004","C00005","C00006","C00469"]):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                        #pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                        #pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                        #pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                        #pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                        #pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                        #pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 0.21785309091100657
A 5 1 0.5189832862631576
A 9 1 0.05731297563113813
A 2 2 0.3675003997356221
A 5 2 0.9553296064694017
A 9 2 0.13065785719337342
B 2 1 0.3693254830665822
B 5 1 0.33345994683751456
B 9 1 0.1551990765240987
B 2 2 0.4607785714705557
B 5 2 -0.13842294955772516
B 9 2 -0.025216676169755203


In [29]:
# impose only one concentration bound at a time to check for infeasible bounds
all_data={}
Uf = 0.00 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==1:
            ref_conc[dat['KEGG_ID']]=dat['amount_int']

for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00044','C00035','C00002','C00004','C00005','C00008','C00003','C00020','C00197']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ["C00002","C00008","C00020","C00044","C00035","C00003","C00004","C00005","C00006","C00469"]):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                        #pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                        #pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                        #pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                        #pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                        #pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                        #pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 0.02921214029979069
A 5 1 0.5913578437365317
A 9 1 0.29225639436427286
A 2 2 0.1554254202551455
A 5 2 1.0976075796793197
A 9 2 0.025696129036212767
B 2 1 0.1806845324553592
B 5 1 0.1448189962263129
B 9 1 0.3172102385854174
B 2 2 0.27213762085933624
B 5 2 -0.32706390016892684
B 9 2 -0.21385762678096043


In [28]:
# impose only one concentration bound at a time to check for infeasible bounds
all_data={}
Uf = 0.00 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==2:
            ref_conc[dat['KEGG_ID']]=dat['amount_int']


for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00044','C00035','C00002','C00004','C00005','C00008','C00003','C00020','C00197']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ["C00002","C00008","C00020","C00044","C00035","C00003","C00004","C00005","C00006","C00469"]):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['amount_int']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                        #pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                        #pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                        #pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                        #pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                        #pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                        #pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 0.27614001623279094
A 5 1 0.37265735110795717
A 9 1 0.17256435464254594
A 2 2 0.2530775788869164
A 5 2 0.904996729950927
A 9 2 0.4273989855585647
B 2 1 0.6134720586542102
B 5 1 0.15823787556394464
B 9 1 -0.10286866162882546
B 2 2 0.42563729465652855
B 5 2 0.1448189962263129
B 9 2 0.2580252696142864
