# Module import

In [1]:
from ETGEMs_function import *

# Data initialization

In [2]:
import pandas as pd
import cobra
#Get Reaction G0 from local file
reaction_g0_file='./Basic Data/reaction_g0.txt'
#Get Metabolite concentration from local file
metabolites_lnC_file = './Basic Data/metabolites_lnC.txt'
#Get Model from local file
model_file='./Basic Data/iML1515.xml'
#Get reaction kcat data from ECMpy
reaction_kcat_MW_file='./Basic Data/ID_kcat_MW_file.csv'

Concretemodel_Need_Data=Get_Concretemodel_Need_Data(reaction_g0_file,metabolites_lnC_file,model_file,reaction_kcat_MW_file)

# 1.MDF calculation
Preset a lower growth rate, for example, when set "biomass_value = 0.3", and then get an MDF value (2.667); when set "biomass_value = 0.7074", and then get an MDF value (-14.571).

In [3]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.114 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=0.7074
K_value=1249

B_value=MDF_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,'gurobi')
print("B value : " +str(B_value))

B value : -14.57128001843502


# 2.Maximum growth rate calculation
By taking the MDF above obtained as the lower bound of thermodynamic constraints.

In [4]:
obj_name='BIOMASS_Ec_iML1515_core_75p37M'
obj_target='maximize'
E_total=0.114 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
K_value=1249

max_biomass_under_mdf=Max_Growth_Rate_Calculation(Concretemodel_Need_Data,obj_name,obj_target,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Max biomass value : " +str(max_biomass_under_mdf))

Max biomass value : 0.7073808171275947


# 3.Minimum enzyme cost calculation
By fixing the MDF and maximum growth rate above obtained.

In [5]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.114 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=max_biomass_under_mdf
K_value=1249
B_value=B_value

min_E=Min_Enzyme_Cost_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min enzyme amount : " +str(min_E))

Min enzyme amount : 0.11399266407250759


# 4.Minimum flux sum calculation（pFBA）
Used to simplify the output file below

In [6]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.114 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=max_biomass_under_mdf
K_value=1249
B_value=B_value

[min_V,Concretemodel]=Min_Flux_Sum_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min flux amount : " +str(min_V))

Min flux amount : 678.1337758616216


# 5.Pathway information output
It is used to extract the following various lists.

In [7]:
model=Concretemodel_Need_Data['model']
reaction_kcat_MW=Concretemodel_Need_Data['reaction_kcat_MW']
reaction_g0=Concretemodel_Need_Data['reaction_g0']
coef_matrix=Concretemodel_Need_Data['coef_matrix']
metabolite_list=Concretemodel_Need_Data['metabolite_list']
use_result = Get_Results_Thermodynamics(model,Concretemodel,reaction_kcat_MW,reaction_g0,coef_matrix,metabolite_list)
use_result = use_result[use_result['flux'] > 1e-10] 
use_result = use_result.sort_values(by = 'flux',axis = 0,ascending = False)
use_result["reaction"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).reaction, axis = 1)
use_result["gpr"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).gene_reaction_rule, axis = 1)
use_result.to_csv('./Analysis Result/' + str(round(max_biomass_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_EcoTCM_6-1.csv', sep=',', header=True, index=True,mode='w')

# 6.List extraction of candidate bottleneck reactions
Standard: the thermodynamic driving force (f) is equal to MDF value (B) above mentiond

In [8]:
use_result_tmp=use_result[use_result['f']>-1249]
use_result_select=use_result_tmp[abs(use_result_tmp['f']-B_value)<=1e-05]
use_result_select.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
GAPD,16.837222,1.0,-14.57128,0.000121,;nadh_c : 0.0007084583731630467;pi_c : 4.99999...,g3p_c + nad_c + pi_c --> 13dpg_c + h_c + nadh_c,b1779
TPI,7.823839,1.0,-14.57128,0.001902,;g3p_c : 0.0019372800476507063;dhap_c : 5.7493...,dhap_c --> g3p_c,b3919
MDH,3.554583,1.0,-14.57128,6.9e-05,;oaa_c : 0.0013300845862784436;mal__L_c : 0.02...,mal__L_c + nad_c --> h_c + nadh_c + oaa_c,b3236
TKT1,1.320979,1.0,-14.57128,0.000461,;g3p_c : 0.0019372800476507063;s7p_c : 4.99999...,r5p_c + xu5p__D_c --> g3p_c + s7p_c,b2935 or b2465
PGCD,1.216711,1.0,-14.57128,0.001086,;3pg_c : 0.02000000000856292;nadh_c : 0.000708...,3pg_c + nad_c --> 3php_c + h_c + nadh_c,b2913
PRPPS,0.66023,1.0,-14.57128,0.000296,;prpp_c : 0.0012362301025760926;amp_c : 5.4119...,atp_c + r5p_c --> amp_c + h_c + prpp_c,b1207
CBMKr,0.443236,1.0,-14.57128,0.000106,;h_c : 1.0;atp_c : 5.411968104158758e-05;cbp_c...,atp_c + co2_c + nh4_c --> adp_c + cbp_c + 2.0 h_c,b0521
IPMD,0.318697,1.0,-14.57128,0.000106,;3c4mop_c : 0.009661213454617063;3c2hmp_c : 0....,3c2hmp_c + nad_c --> 3c4mop_c + h_c + nadh_c,b0073 or b1800
DHQTi,0.269596,1.0,-14.57128,0.000146,;3dhq_c : 4.999999992621096e-07;h2o_c : 1.0;3d...,3dhq_c --> 3dhsk_c + h2o_c,b1693
ACACT5r,0.251743,1.0,-14.57128,0.000501,;dcacoa_c : 4.999999992621096e-07;3oddcoa_c : ...,accoa_c + dcacoa_c --> 3oddcoa_c + coa_c,b3845 or b2342


In [9]:
path_reac_list=list(use_result_select.index)

# 7.Determination of bottleneck reaction
Calculate the maximum thermodynamic driving force for reactions in above list, if the value is still equal to B, it is the bottleneck reaction.

In [10]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_Df_list_fixed=pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Df_Complete,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,max_biomass_under_mdf,\
                               biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in path_reac_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_Df_list_fixed.loc[eachindex,'max_Df_complete']=tmp.loc[eachindex,'max_value']

endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_Df_list_fixed.to_csv('./Analysis Result/max_min_Df_complete_for_specific_reaction.csv', sep=',', header=True, index=True,mode='w')

0:00:30.960323


In [11]:
max_min_Df_list_fixed=max_min_Df_list_fixed.sort_values(by='max_Df_complete',ascending = True)
max_min_Df_list_fixed.head(10)

Unnamed: 0,max_Df_complete
DXYLTD_reverse,-14.571279
CBMKr,-9.48638
PGCD,1.567088
MDH,5.767088
GLU5K,6.467088
PDX5PO2,10.767088
IPMD,15.467088
GAPD,18.777981
DHORTS_reverse,20.828721
TPI,21.828721


In [12]:
Bottleneck_reaction=max_min_Df_list_fixed[(max_min_Df_list_fixed['max_Df_complete']-B_value)<=0.001]
Bottleneck_reaction

Unnamed: 0,max_Df_complete
DXYLTD_reverse,-14.571279


In [13]:
use_result_select.loc[Bottleneck_reaction.index[0],'met_concentration']

';2ddara_c : 0.019999992253620126;h2o_c : 1.0;dxylnt_c : 4.999999992621096e-07'

# 8.List extraction of candidate limiting metabolites
Standard: involved in bottleneck reactions, except fo water (h2o_c) and protons (h_c).

In [14]:
Bottleneck_reaction_lsit=list(Bottleneck_reaction.index)
Bottleneck_reaction_met=[]
for rea in model.reactions:
    if rea.id in Bottleneck_reaction_lsit:
        #print(rea)
        for met in model.metabolites:
            try:
                rea.get_coefficient(met.id)  
            except:
                pass
            else:
                if met.id !='h_c' and met.id !='h2o_c':
                    Bottleneck_reaction_met.append(met.id)
                

Bottleneck_reaction_met=list(set(Bottleneck_reaction_met))
Bottleneck_reaction_met

['2ddara_c', 'dxylnt_c']

# 9.Determination of limitting metabolites
Calculate the maximum and minimum concentrations for metabolites in above list, if the two values are equal, it is the limiting metabolite.

In [15]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_concentration_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'maximize',K_value,B_value,\
        max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'max_concentration'] = tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'minimize',K_value,B_value,\
        max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'min_concentration'] = tmp.loc[eachindex,'min_value']
            
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_concentration_list_fixed.to_csv('./Analysis Result/max_min_concentration_for_specific_metabolite.csv', sep=',', header=True, index=True,mode='w')


0:00:14.717242


In [16]:
max_min_concentration_list_fixed

Unnamed: 0,max_concentration,min_concentration
2ddara_c,-3.912023,-3.912023
dxylnt_c,-14.508657,-14.508658


In [17]:
Limiting_metabolite = max_min_concentration_list_fixed[(max_min_concentration_list_fixed['max_concentration'] - max_min_concentration_list_fixed['min_concentration']) <= 0.001]
Limiting_metabolite

Unnamed: 0,max_concentration,min_concentration
2ddara_c,-3.912023,-3.912023
dxylnt_c,-14.508657,-14.508658


# 10.List extraction of candidate key enzymes
Standard: The amount of enzyme usage was more than 0.228 mg/gDW （above 1% of e_pool）

In [18]:
step5_file = pd.read_csv('./Analysis Result/' + str(round(max_biomass_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_EcoTCM_6-1.csv',index_col=0)
step5_file_sort = step5_file.sort_values(by='enz',ascending = False)
step5_file_sort.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
PDH,12.298751,1.0,8.691833,0.00602,;pyr_c : 1.3223265105347658e-05;nadh_c : 0.000...,coa_c + nad_c + pyr_c --> accoa_c + co2_c + na...,b0116 and b0115 and b0114
PPND,0.097703,1.0,16.263114,0.005071,;34hpp_c : 0.02000000000856292;nadh_c : 0.0007...,nad_c + pphn_c --> 34hpp_c + co2_c + nadh_c,b2600
ADSS,0.211722,1.0,31.37357,0.004065,;imp_c : 0.02000000000856292;dcamp_c : 4.99999...,asp__L_c + gtp_c + imp_c --> dcamp_c + gdp_c +...,b4177
CS,2.172745,1.0,60.42453,0.003952,;h2o_c : 1.0;cit_c : 0.007337184970509802;acco...,accoa_c + h2o_c + oaa_c --> cit_c + coa_c + h_c,b0720
DDPA,0.269596,1.0,123.557442,0.003413,;pep_c : 0.02000000000856292;h2o_c : 1.0;e4p_c...,e4p_c + h2o_c + pep_c --> 2dda7p_c + pi_c,b0754 or b2601 or b1704
ACONTa,2.172745,1.0,10.171279,0.003202,;h2o_c : 1.0;cit_c : 0.007337184970509802;acon...,cit_c --> acon_C_c + h2o_c,b0118 or b1276
ICL,1.404758,1.0,24.231666,0.00305,;succ_c : 4.999999992621096e-07;icit_c : 4.999...,icit_c --> glx_c + succ_c,b4015
GND,4.381726,1.0,22.772542,0.0028,;nadph_c : 0.02000000000856292;nadp_c : 0.0020...,6pgc_c + nadp_c --> co2_c + nadph_c + ru5p__D_c,b2029
PGK_reverse,16.837222,1.0,-14.567088,0.00266,;3pg_c : 0.02000000000856292;adp_c : 5.4119681...,13dpg_c + adp_c --> 3pg_c + atp_c,b2926
PFK,7.923796,1.0,12.223281,0.002304,;adp_c : 5.411968104158752e-06;f6p_c : 4.99999...,atp_c + f6p_c --> adp_c + fdp_c + h_c,b3916 or b1723


In [19]:
enz_use_reaction_list = list(step5_file_sort[step5_file_sort['enz'] > 0.00114].index)
#enz_use_reaction_list

# 11.Determination of key enzymes.
Calculate the minimum enzyme cost for reactions in above list, and the higher the value, the more critical (key) the enzyme.

In [20]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_E_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,\
                    max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'max_E']=tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'minimize',K_value,B_value,\
                    max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'min_E']=tmp.loc[eachindex,'min_value']
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_E_list_fixed.to_csv('./Analysis Result/max_min_E.csv', sep=',', header=True, index=True,mode='w')


0:01:17.426127


In [21]:
max_min_E_list_fixed.sort_values(by='max_E',ascending = False)

Unnamed: 0,max_E,min_E
PDH,0.006024,0.006009
PPND,0.005076,0.005071
ADSS,0.004071,0.004065
CS,0.003954,0.003927
DDPA,0.003415,0.003413
ACONTa,0.003204,0.003182
ICL,0.003069,0.003036
GND,0.002832,0.0028
PGK_reverse,0.002667,0.002657
PFK,0.002305,0.002299
