# Using thermodynamic analysis to guide metabolic engineering
DO 12-6-2018  
Using Python 3 and eQuilibrator API  


In [2]:
#Changing directory to the github home folder /Ctherm_thermo
import os
os.chdir('/Users/satyakam/Dropbox/work/equilibrator-api-master')
import sys
sys.path.append('/Users/satyakam/Dropbox/work/sbtab-0.9.64')
#sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master')
#sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master/src/equilibrator_api')
sys.path.append('/Users/satyakam/Dropbox/work/equilibrator-api-master/src')
#sys.path.append('/Users/satyakam/Dropbox/work')


In [3]:
import numpy as np
from numpy import array, eye, log, zeros, matrix
from numpy.linalg import inv, solve
import pandas as pd
from equilibrator_api import Reaction, ComponentContribution, ReactionMatcher, CompoundMatcher, ParseError, Pathway
from equilibrator_api.bounds import Bounds
%matplotlib inline

## Set up translator for KEGG IDs
Note: I set these up as dataframes because I was troubleshooting an issue with duplicate KEGG IDs. Eventually I think these should be set up as dictionaries, to make the code more readable

In [4]:
# make a dictionary to translate KEGG IDs into human-readable abbreviations
keggTranslatorDf = pd.read_excel('KEGG_SEED_DO.xls')
kt = keggTranslatorDf #short name for easier typing

# translate KEGG ID to abbreviation
kta = kt.loc[:,['KEGG ID(S)', 'ABBREVIATION']]
kta['KEGG ID(S)'] = kta['KEGG ID(S)'].str.lower() # set to lower case for better matching
kta['ABBREVIATION'] = kta['ABBREVIATION'].str.lower() # set to lower case for better matching
kta.set_index('KEGG ID(S)', inplace = True)

# translate abbreviation to KEGG ID
atk = kt.loc[:,['ABBREVIATION', 'KEGG ID(S)']].sort_values(by = ['KEGG ID(S)'], ascending = True)
atk['KEGG ID(S)'] = atk['KEGG ID(S)'].str.lower() # set to lower case for better matching
atk['ABBREVIATION'] = atk['ABBREVIATION'].str.lower() # set to lower case for better matching
atk = atk.groupby('ABBREVIATION').first() # take the first KEGG ID in each group

atkDict = dict(zip(atk.index, atk['KEGG ID(S)'].str.upper()))
ktaDict = dict(zip(kta.index.str.upper(), kta['ABBREVIATION'].values))

## Set up model
* Choose reactions
* Set fluxes
* Set concentration bounds
* Set pH and ionic strength

In [5]:
os.chdir('/Users/satyakam/Dropbox/work/component_contribution_ctherm')

In [46]:
allRxnDf = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'reactions')
allRxnDf[:2]

Unnamed: 0,Name,PlaintextFormula,AbbreviationFormula,KeggFormula,Notes,ppi-pfk,atp-pfk,mal-snt,pdc,gapn,pyk,aldh-ndp,adh-ndp,mal-gapn,tsac
0,ATPase1,ATP + H2O <=> ADP + Pi,h2o + atp <=> adp + pi,C00002 + C00001 <=> C00008 + C00009,ATP hydrolysis for cellobiose transport,2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,CBP,Phosphate + Cellobiose <=> D-Glucose + Glucose...,pi + cellb <=> glc-D + g1p,C00009 + C00185 <=> C00031 + C00103,,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [7]:
# choose a flux set, and drop all of the zero-flux reactions
# loop over fluxsets to generate multiple models 
selectedRxnDf={}
fluxSet = 'ppi-pfk'
for f in range(len(fluxSet)):
    selectedRxnDf = allRxnDf.loc[allRxnDf[fluxSet] != 0, ['Name', 'AbbreviationFormula', 'KeggFormula', fluxSet]]
    selectedRxnDf.rename(columns = {fluxSet:'flux'}, inplace = True) # rename the flux columns to 'flux' to simplify subsequent
    #rxnlist[f]= selectedRxnDf  

In [37]:
#Import metabolite data from excel file
Met_bound = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'metabolite_bounds')
Alldata = pd.read_excel('./metabolite_data/lt_dataset2_quantified.xlsx')
Met_data = Alldata.loc[Alldata['Replicate'] != 0, ['Timepoint', 'KEGG_ID', 'compound','amount_int', 'peakAreaTop','Sample','Replicate','Maven filename']]
Met_data[:2]

Unnamed: 0,Timepoint,KEGG_ID,compound,amount_int,peakAreaTop,Sample,Replicate,Maven filename
0,2,C00197,3PG,217.958918,8703433.0,A,1,A1_1
1,2,C00197,3PG,137.397616,5454995.0,A,2,A1_2


In [64]:
Met_data_WT = pd.read_excel('cth_thermo_model_DO_SD9.xlsx', sheet_name = 'WT_data')
Met_data_WT

Unnamed: 0,Name,KEGG_ID,Abbreviation,Concentration
0,Glucose-6-phosphate,C00092,g6p,0.008192
1,Fructose-6-phosphate,C00085,f6p,0.001494
2,Glucose-1-phosphate,C00103,g1p,0.006663
3,Fructose 1_6-bisphosphate,C00354,fdp,0.001497
4,Malate,C00149,mal-l,0.037807
5,DHAP,C00111,dhap,0.00127
6,GAP,C00118,g3p,0.000102
7,3-phosphoglycerate,C00197,3pg,0.001348
8,Phosphoenolpyruvate,C00074,pep,0.00069
9,pyruvate,C00022,pyr,0.012655


In [10]:
# parse the KeggFormula column to create eQuilibrator Reaction objects
# create reaction list
reactions = []
for i, row in selectedRxnDf.iterrows():
    rxn = Reaction.parse_formula(row['KeggFormula'], rid = row['Name'])
    if (rxn.check_full_reaction_balancing()):
        reactions.append(rxn)
    else:
        print('Error: reaction {} is not balanced'.format(row['AbbreviationFormula']))
        
# create flux list
fluxes = selectedRxnDf['flux'].values

# need to declare pH and ionic strength constants at the beginning
PH = 7.0
IS = 0.1

# calculate dGO_r_primes
dG0_r_primes = []
for r in reactions:
    result = r.dG0_prime(pH = PH, ionic_strength = IS)
    #print(result)
    dG0_r_primes.append(result)

In [11]:
# set up the model
pp = Pathway(reactions = reactions, fluxes = fluxes, dG0_r_primes = dG0_r_primes)

In [86]:
# Check MDF using Daniel's data
all_data={}
Uf = 1.5 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.
   
for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
    pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'])
for i, dat in Met_data_WT.iterrows():
    if dat['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
        pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5) 
    #else:
        #pp.bounds.SetBounds(dat['KEGG_ID'],1e-6,1e-2) 
pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)  
mdf_data,con = pp.conc_mdf()
print(mdf_data.mdf)
k=0
atp_ratio=1
gtp_ratio=1
nadh_ratio=1
for r in mdf_data.reaction_data:
    print (r.reaction.reaction_id,r.dGr,r.shadow_price)
for c in mdf_data.compound_data:
    print(ktaDict[c.compound], c.concentration,con[k],mdf_data.model.concentration_bounds.GetBoundTuple(c.compound),c.shadow_price)
    k= k+1
    if c.compound in ['C00002']:
        atp_ratio=atp_ratio*c.concentration
    elif c.compound in ['C00008']:
        atp_ratio=atp_ratio/c.concentration
    elif c.compound in ['C00003']:
        nadh_ratio=nadh_ratio/c.concentration
    elif c.compound in ['C00004']:
        nadh_ratio=nadh_ratio*c.concentration
    elif c.compound in ['C00035']:
        gtp_ratio=gtp_ratio/c.concentration
    elif c.compound in ['C00044']:
        gtp_ratio=gtp_ratio*c.concentration
print('atp_ratio =',atp_ratio)
print('gtp_ratio =',gtp_ratio)
print('nadh_ratio =',nadh_ratio)        

0.4355419322778715
ATPase1 -43.507631032369446 0.0
CBP -0.4355419322778715 0.0
GLK-GTP -17.108074744466016 0.0
PGMT -8.627602452025007 4.4809943089693173e-17
PGI -1.6911720565429524 -4.4809943089693173e-17
PFK-PPi -6.0634346769236505 -7.631693432463365e-17
FBA -2.35681187837147 -1.344298292690795e-16
TPI -0.7604162977343956 -1.344298292690795e-16
GAPDH -0.43554193227787685 0.33333333333333337
PGK-GTP -0.43554193227787863 0.33333333333333404
PGM -0.4355419322778715 -1.344298292690795e-16
ENO -0.4355419322778751 -1.344298292690795e-16
PPDK -6.230126375770931 0.0
PFOR -3.254347505778501 0.0
RNF_PPiase -36.382776628448056 0.0
ALDH-NADH -0.4355419322778715 -4.480994308969317e-16
ADH-NADH -4.246105253540065 -3.003666497730995e-16
NDK -0.43554193227787286 0.33333333333333404
ATPase2 -49.84663916080911 0.0
PTA -0.4355419322778733 5.37719317076318e-16
ACK -9.279937239355183 0.0
h2o 1.0 (1.0, 1.0) (1.0, 1.0) 2.498001805406602e-16
atp 0.0010000000000000002 (0.0009999999999999994, 0.00100000000000

In [85]:
# impose only one concentration bound at a time to check for infeasible bounds
# Cencentrations are scaled with respect to the 2nd time point of sample B, each replicate has its own reference
all_data={}
Uf = 0.1 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        ref_conc[dat['KEGG_ID']][dat['Replicate']]=dat['peakAreaTop']
print("Compound name ","Pathway MDF","atp_ratio","gtp_ratio","nadh_ratio")
for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    print(smple,tym,rep)
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            try:
                for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
                    pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
                for i, dat in Met_data.iterrows():
                # skip cofactors and impose bound on metabolites one at a time    
                    if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ["C00002","C00008","C00020","C00044","C00035","C00003","C00004","C00005","C00006","C00469"]):
                        all_data[sm][i]={}
                        lb = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']][dat['Replicate']]) * (1-Uf)
                        ub = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']][dat['Replicate']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                        #print(dat['KEGG_ID'], lb, ub)
                        if tym  == 2 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                            pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                        elif tym ==5 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                            pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                        elif tym ==9 and smple == 'A':
                            pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                            pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                        if tym  == 2 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                            pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                        elif tym ==5 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                            pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                        elif tym ==9 and smple == 'B':
                            pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                            pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
                        mdf_data = pp.calc_mdf()
                        #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
                        atp_ratio=1
                        gtp_ratio=1
                        nadh_ratio=1
                        for c in mdf_data.compound_data:
                            if c.compound in ['C00002']:
                                atp_ratio=atp_ratio*c.concentration
                            elif c.compound in ['C00008']:
                                atp_ratio=atp_ratio/c.concentration
                            elif c.compound in ['C00003']:
                                nadh_ratio=nadh_ratio/c.concentration
                            elif c.compound in ['C00004']:
                                nadh_ratio=nadh_ratio*c.concentration
                            elif c.compound in ['C00035']:
                                gtp_ratio=gtp_ratio/c.concentration
                            elif c.compound in ['C00044']:
                                gtp_ratio=gtp_ratio*c.concentration
                            #print(c.compound, log(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)), sm,log(all_data[0]['mdf'].model.concentration_bounds.GetBoundTuple(c.compound)))
                        #print('atp_ratio =',atp_ratio)
                        #print('gtp_ratio =',gtp_ratio)
                        #print('nadh_ratio =',nadh_ratio)
                        print(ktaDict[dat['KEGG_ID']],mdf_data.mdf,atp_ratio,gtp_ratio,nadh_ratio)
                        #print("__________________")
                        all_data[sm]['sample']=smple
                        all_data[sm]['timepoint']=tym
                        all_data[sm]['replicate']=rep
                        #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
                        all_data[sm]['mdf']=mdf_data
            except:
                print("no ref for", dat2['KEGG_ID'])

Compound name  Pathway MDF atp_ratio gtp_ratio nadh_ratio
A 2 1
g6p 3.006043496039119 9.999999999999977 91.19453336139338 0.00999999999999996
f6p 3.1354457304223047 9.999999999999977 96.08404332863121 0.00999999999999996
fdp 2.6939169223003354 9.999999999999993 80.40021871744362 0.009999999999999943
mal-l 3.2107686908816575 10.00000000000001 99.04997463072802 0.009999999999999978
dhap 1.7470610022436652 9.999999999999993 54.86383583558178 0.009999999999999997
3pg 1.1056906287919688 9.999999999999993 42.350895630763695 0.009999999999999997
pep 1.2135732033611788 9.999999999999993 44.23570551894808 0.010000000000000033
accoa 3.2107686908816575 9.999999999999993 99.04997463072768 0.009999999999999997
coa 2.9845865985488516 9.999999999999993 90.40817465557878 0.017293384847327183
A 5 1
g6p 2.9831436910639724 10.00000000000001 90.35553853238184 0.011585416942474402
f6p 3.1112852233645967 9.999999999999986 95.15163596884568 0.01220037413713771
fdp 3.1210185547534977 10.00000000000002 95.5261

In [84]:
# leave one out concentration bound at a time to check for infeasible bounds
# Cencentrations are scaled with respect to the 2nd time point of sample B, each replicate has its own reference
all_data={}
Uf = 0.3 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        ref_conc[dat['KEGG_ID']][dat['Replicate']]=dat['peakAreaTop']
print("Compound name ","Pathway MDF","atp_ratio","gtp_ratio","nadh_ratio")
for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    print(smple,tym,rep)
    for i3, dat3 in Met_data_WT.iterrows():
        if dat3['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
            for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
                pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'])
            for i2, dat2 in Met_data_WT.iterrows():
                if dat2['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006']:
                    #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
                    try:
                        for i, dat in Met_data.iterrows():
                        # skip cofactors and impose bound on metabolites one at a time    
                            if (dat2['KEGG_ID']==dat['KEGG_ID']) and (dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ['C00002','C00008','C00020','C00035','C00044','C00022','C00103','C00003','C00004','C00005','C00006','C00469']):
                                all_data[sm][i]={}
                                lb = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']][dat['Replicate']])*(1-Uf)
                                #print(dat['KEGG_ID'],dat['peakAreaTop'],ref_conc[dat['KEGG_ID']][dat['Replicate']],dat2['Concentration'])
                                ub = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']][dat['Replicate']]) *(1+Uf)
                                if lb < DEFAULT_LB:    
                                    lb = DEFAULT_LB

                                if lb > DEFAULT_UB:
                                    lb = DEFAULT_UB

                                if ub < DEFAULT_LB:
                                    ub = DEFAULT_LB

                                if ub > DEFAULT_UB:
                                    ub = DEFAULT_UB

                                #leave one out
                                if (dat2['KEGG_ID']!=dat3['KEGG_ID']):
                                    pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                                #print(dat['KEGG_ID'], lb, ub)
                                if tym  == 2 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 5.12/46, 5.12/46)
                                    pp.bounds.SetBounds('C00033', 2.87/1000, 2.87/1000)
                                elif tym ==5 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 20.21/46, 20.21/46)
                                    pp.bounds.SetBounds('C00033', 4.25/1000, 4.25/1000)
                                elif tym ==9 and smple == 'A':
                                    pp.bounds.SetBounds('C00469', 40.38/46, 40.38/46)
                                    pp.bounds.SetBounds('C00033', 4.39/1000, 4.39/1000)
                                if tym  == 2 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.2/46, 0.2/46)
                                    pp.bounds.SetBounds('C00033', 3.21/1000, 3.21/1000)
                                elif tym ==5 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.47/46, 0.47/46)
                                    pp.bounds.SetBounds('C00033', 8.61/1000, 8.61/1000)
                                elif tym ==9 and smple == 'B':
                                    pp.bounds.SetBounds('C00469', 0.58/46, 0.58/46)
                                    pp.bounds.SetBounds('C00033', 9.21/1000, 9.21/1000)
                    except:
                        print("no ref for", dat2['KEGG_ID'])
            mdf_data = pp.calc_mdf()
            #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
            atp_ratio=1
            gtp_ratio=1
            nadh_ratio=1
            for c in mdf_data.compound_data:
                if c.compound in ['C00002']:
                    atp_ratio=atp_ratio*c.concentration
                elif c.compound in ['C00008']:
                    atp_ratio=atp_ratio/c.concentration
                elif c.compound in ['C00003']:
                    nadh_ratio=nadh_ratio/c.concentration
                elif c.compound in ['C00004']:
                    nadh_ratio=nadh_ratio*c.concentration
                elif c.compound in ['C00035']:
                    gtp_ratio=gtp_ratio/c.concentration
                elif c.compound in ['C00044']:
                    gtp_ratio=gtp_ratio*c.concentration
                #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
            print(ktaDict[dat3['KEGG_ID']],mdf_data.mdf,atp_ratio,gtp_ratio,nadh_ratio)

            all_data[sm]['sample']=smple
            all_data[sm]['timepoint']=tym
            all_data[sm]['replicate']=rep

            all_data[sm]['mdf']=mdf_data


Compound name  Pathway MDF atp_ratio gtp_ratio nadh_ratio
A 2 1
g6p -0.26174170389067086 9.999999999999977 24.387662103520928 0.010000000000000014
f6p -0.26174170389067086 9.999999999999993 24.387662103521 0.010000000000000033
fdp -0.26174170389067086 10.00000000000001 24.387662103521016 0.010000000000000031
mal-l -0.26174170389067086 10.00000000000001 24.387662103521016 0.010000000000000014
dhap 0.21168625613767134 9.999999999999975 29.522674624477823 0.009999999999999997
g3p -0.26174170389067086 10.00000000000001 24.387662103521016 0.010000000000000014
3pg 0.3019516482394202 10.00000000000001 30.618083627925603 0.009999999999999943
pep -0.26174170389067086 10.000000000000028 24.38766210352104 0.010000000000000014
accoa -0.26174170389067086 10.00000000000001 24.387662103521016 0.010000000000000014
coa -0.26174170389066376 9.999999999999993 24.387662103521063 0.009999999999999962
A 5 1
g6p 0.8605809322325868 9.999999999999977 38.36172943197613 0.08031592597977157
f6p 0.8605809322325797

In [73]:
for i, dat in Met_data.iterrows():
    #print(dat['KEGG_ID'])
    if Met_data_WT['KEGG_ID'].astype(str).str.contains(dat['KEGG_ID']).any() :
        print(ktaDict[dat['KEGG_ID']])

3pg
3pg
3pg
3pg
3pg
3pg
3pg
3pg
3pg
3pg
3pg
3pg
accoa
accoa
accoa
accoa
accoa
accoa
accoa
accoa
accoa
accoa
accoa
adp
adp
adp
adp
adp
adp
adp
adp
adp
adp
adp
adp
amp
amp
amp
amp
amp
amp
amp
amp
amp
amp
amp
amp
atp
atp
atp
atp
atp
atp
atp
atp
atp
atp
coa
coa
coa
coa
coa
coa
coa
coa
coa
coa
coa
coa
dhap
dhap
dhap
dhap
dhap
dhap
dhap
dhap
dhap
dhap
dhap
dhap
fdp
fdp
fdp
fdp
fdp
fdp
fdp
fdp
fdp
fdp
fdp
fdp
f6p
f6p
f6p
f6p
f6p
f6p
f6p
f6p
f6p
f6p
f6p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g1p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
g6p
gdp
gdp
gdp
gdp
gdp
gdp
gdp
gdp
gdp
gdp
gdp
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
mal-l
nad
nad
nad
nad
nad
nad
nad
nad
nad
nad
nad
nad
nadh
nadh
nadh
nadh
nadh
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadp
nadph
nadph
nadph
nadph
nadph
nadph
nadph
nadph
nadph
pep
pep
pep
pep
pep
pep
pep
pep
pep
pep
pep
pep
pyr
pyr
pyr
pyr
pyr
pyr
pyr
pyr
pyr
pyr
pyr
pyr
gtp
gtp
gtp
gtp


In [96]:
# impose all concentration bound at a time 
all_data={}
Uf = 0.0 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==1:
            ref_conc[dat['KEGG_ID']]=dat['peakAreaTop']
        else:
            ref_conc[dat['KEGG_ID']]=(ref_conc[dat['KEGG_ID']]+dat['peakAreaTop'])/2
for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006','C00469']):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46*(1-Uf), 5.12/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 2.87/1000*(1-Uf), 2.87/1000*(1+Uf))
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46*(1-Uf), 20.21/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.25/1000*(1-Uf), 4.25/1000*(1+Uf))
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46*(1-Uf), 40.38/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.39/1000*(1-Uf), 4.39/1000*(1+Uf))
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46*(1-Uf), 0.2/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 3.21/1000*(1-Uf), 3.21/1000*(1+Uf))
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46*(1-Uf), 0.47/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 8.61/1000*(1-Uf), 8.61/1000*(1+Uf))
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46*(1-Uf), 0.58/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 9.21/1000*(1-Uf), 9.21/1000*(1+Uf))
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 -0.37048678699357396
A 5 1 0.15059405943372184
A 9 1 -0.03562119516580253
A 2 2 0.2528373524279175
A 5 2 0.3910553676359214
A 9 2 -0.025736436596325518
B 2 1 0.039848751972591856
B 5 1 0.110964697453241
B 9 1 0.09644257135028056
B 2 2 0.1101825899547606
B 5 2 -0.5399730897282282
B 9 2 -0.35049902498941776


In [97]:
# impose all concentration bound at a time 
all_data={}
Uf = 0.0 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==1:
            ref_conc[dat['KEGG_ID']]=dat['peakAreaTop']

for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006','C00469']):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46*(1-Uf), 5.12/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 2.87/1000*(1-Uf), 2.87/1000*(1+Uf))
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46*(1-Uf), 20.21/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.25/1000*(1-Uf), 4.25/1000*(1+Uf))
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46*(1-Uf), 40.38/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.39/1000*(1-Uf), 4.39/1000*(1+Uf))
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46*(1-Uf), 0.2/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 3.21/1000*(1-Uf), 3.21/1000*(1+Uf))
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46*(1-Uf), 0.47/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 8.61/1000*(1-Uf), 8.61/1000*(1+Uf))
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46*(1-Uf), 0.58/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 9.21/1000*(1-Uf), 9.21/1000*(1+Uf))
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 -0.6451786907122425
A 5 1 0.2005195628313423
A 9 1 0.1174646228611067
A 2 2 -0.021854551290754642
A 5 2 0.44098087103353834
A 9 2 -0.10871034961615322
B 2 1 -0.23484315174608028
B 5 1 -0.1637272062654347
B 9 1 0.18915355466607764
B 2 2 -0.16450931376390798
B 5 2 -0.814664993446911
B 9 2 -0.6251909287080899


In [98]:
# impose all concentration bound at a time 
all_data={}
Uf = 0.0 #Uncertainity factor 
DEFAULT_LB = 1e-6
DEFAULT_UB = 2e-2 #20 mM, which is 2x higher than the 10 mM default bound suggested in the Noor et al. 2014 paper.

ref_conc={}
for i, dat in Met_data.iterrows():
    if dat['Timepoint']==5 and dat['Sample']=='B':
        if dat['KEGG_ID'] not in ref_conc:
            ref_conc[dat['KEGG_ID']]={}
        if dat['Replicate']==2:
            ref_conc[dat['KEGG_ID']]=dat['peakAreaTop']

for sm in range(12): # divide the samples into datasets
    all_data[sm]={}
    if sm < 6:
        smple = 'A'
    else:
        smple = 'B'
    if (sm)%3==0:
        tym = 2
    elif (sm)%3==1:
        tym = 5
    else:
        tym = 9
    if sm<3 or (sm<9 and sm>5):
        rep = 1
    else:
        rep = 2
    #print(smple,tym,rep)
    for i, cpd in Met_bound.iterrows():  # reset bounds at the start of each iteration
        pp.bounds.SetBounds(cpd['KEGG_ID'],cpd['Concentration:Min'],cpd['Concentration:Max'] )
    for i2, dat2 in Met_data_WT.iterrows():
        if dat2['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006']:
            #pp.bounds.SetBounds(dat['KEGG_ID'],dat['Concentration']*.50,dat['Concentration']*1.5)
            for i, dat in Met_data.iterrows():
                if (dat2['KEGG_ID']==dat['KEGG_ID'])and(dat['Sample']==smple and dat['Timepoint']==tym and dat['Replicate']==rep) and (dat['KEGG_ID'] not in ['C00002','C00008','C00035','C00044','C00003','C00004','C00005','C00006','C00469']):
                    all_data[sm][i]={}
                    try:
                        lb = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1-Uf)
                        ub = dat2['Concentration']*dat['peakAreaTop']/(ref_conc[dat['KEGG_ID']]) *(1+Uf)
                        if lb < DEFAULT_LB:    
                            lb = DEFAULT_LB

                        if lb > DEFAULT_UB:
                            lb = DEFAULT_UB

                        if ub < DEFAULT_LB:
                            ub = DEFAULT_LB

                        if ub > DEFAULT_UB:
                            ub = DEFAULT_UB

                        pp.bounds.SetBounds(dat['KEGG_ID'], lb, ub)
                    except:
                        print("ref not found for",dat['KEGG_ID'])
                    #print(dat['KEGG_ID'], lb, ub)
                    if tym  == 2 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 5.12/46*(1-Uf), 5.12/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 2.87/1000*(1-Uf), 2.87/1000*(1+Uf))
                    elif tym ==5 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 20.21/46*(1-Uf), 20.21/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.25/1000*(1-Uf), 4.25/1000*(1+Uf))
                    elif tym ==9 and smple == 'A':
                        pp.bounds.SetBounds('C00469', 40.38/46*(1-Uf), 40.38/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 4.39/1000*(1-Uf), 4.39/1000*(1+Uf))
                    if tym  == 2 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.2/46*(1-Uf), 0.2/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 3.21/1000*(1-Uf), 3.21/1000*(1+Uf))
                    elif tym ==5 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.47/46*(1-Uf), 0.47/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 8.61/1000*(1-Uf), 8.61/1000*(1+Uf))
                    elif tym ==9 and smple == 'B':
                        pp.bounds.SetBounds('C00469', 0.58/46*(1-Uf), 0.58/46*(1+Uf))
                        pp.bounds.SetBounds('C00033', 9.21/1000*(1-Uf), 9.21/1000*(1+Uf))
    mdf_data = pp.calc_mdf()
    #print(smple,tym,rep,dat['KEGG_ID'],mdf_data.mdf)
    atp_ratio=1
    gtp_ratio=1
    nadh_ratio=1
    for c in mdf_data.compound_data:
        if c.compound in ['C00002']:
            atp_ratio=atp_ratio*c.concentration
        elif c.compound in ['C00008']:
            atp_ratio=atp_ratio/c.concentration
        elif c.compound in ['C00003']:
            nadh_ratio=nadh_ratio/c.concentration
        elif c.compound in ['C00004']:
            nadh_ratio=nadh_ratio*c.concentration
        elif c.compound in ['C00035']:
            gtp_ratio=gtp_ratio/c.concentration
        elif c.compound in ['C00044']:
            gtp_ratio=gtp_ratio*c.concentration
        #print(c.compound, c.concentration,(mdf_data.model.concentration_bounds.GetBoundTuple(c.compound)))
    #print('atp_ratio =',atp_ratio)
    #print('gtp_ratio =',gtp_ratio)
    #print('nadh_ratio =',nadh_ratio)
    print(smple,tym,rep,mdf_data.mdf)
    #print("__________________")
    all_data[sm]['sample']=smple
    all_data[sm]['timepoint']=tym
    all_data[sm]['replicate']=rep
    #all_data[sm][cpd['KEGG_ID']]['metab']=dat['KEGG_ID']
    all_data[sm]['mdf']=mdf_data
    #except:
    #print("no ref for", dat2['KEGG_ID'])

A 2 1 -0.016704161423845676
A 5 1 0.10832533565565683
A 9 1 0.08361495632936311
A 2 2 0.24241726314645007
A 5 2 0.34878664385785285
A 9 2 0.19938312507273537
B 2 1 0.4160946354353783
B 5 1 0.005627908719660013
B 9 1 -0.19982891049150453
B 2 2 0.10279492609196694
B 5 2 -0.16372720626543114
B 9 2 0.02574685847337932
