# Module import

In [1]:
from ETGEMs_function_N import *

# Data initialization

In [24]:
import pandas as pd
import cobra

reaction_g0_file='./Basic Data/reaction_g0_N.txt'
metabolites_lnC_file = './Basic Data/metabolites_lnC.txt'
reaction_kcat_MW_file='./Basic Data/ID_kcat_MW_file_N.csv'

model=cobra.io.read_sbml_model('./Basic Data/iML1515.xml')
cobra.manipulation.modify.convert_to_irreversible(model)


#model.reactions.get_by_id('ATPM').bounds = (0.0, 1000)
model.reactions.get_by_id('TRPS2').bounds = (0.0, 0.0)
model.reactions.get_by_id('TRPS3').bounds = (0.0, 0.0)
model.reactions.get_by_id('ACONTa').bounds = (0.0, 0.0)
model.reactions.get_by_id('ACONTb').bounds = (0.0, 0.0)
model.reactions.get_by_id('ACONTa_reverse').bounds = (0.0, 0.0)
model.reactions.get_by_id('ACONTb_reverse').bounds = (0.0, 0.0)

R=cobra.Reaction('ACONT')#add new reaction
model.add_reaction(R)
R.build_reaction_from_string('cit_c --> icit_c',fwd_arrow="-->")
model.reactions.get_by_id('ACONT').bounds = (0.0, 1000)

R=cobra.Reaction('ACONT_reverse')
model.add_reaction(R)
R.build_reaction_from_string('icit_c --> cit_c',fwd_arrow="-->")
model.reactions.get_by_id('ACONT_reverse').bounds = (0.0, 1000)


model.reactions.get_by_id('PGCD').bounds = (0.0, 0.0)
model.reactions.get_by_id('AHGDx').bounds = (0.0, 0.0)
model.reactions.get_by_id('AHGDx_reverse').bounds = (0.0, 0.0)

R=cobra.Reaction('SERA')
model.add_reaction(R)
R.build_reaction_from_string('3pg_c + akg_c --> 3php_c + S2hglut_c',fwd_arrow="-->")
model.reactions.get_by_id('SERA').bounds = (0.0, 1000)

Concretemodel_Need_Data=Get_Concretemodel_Need_Data(reaction_g0_file,metabolites_lnC_file,model,reaction_kcat_MW_file)

# 1.MDF calculation
Preset a lower growth rate, for example, when set "biomass_value = 0.3", and then get an MDF value (2.667); when set "biomass_value = 0.7074", and then get an MDF value (-14.571).

In [96]:
product_id = 'BIOMASS_Ec_iML1515_core_75p37M'
#biomass_id='DM_4hpro_LT_c'
#biomass_id='DM_ser__L_c'
# biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total = 0.13 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name = 'EX_glc__D_e_reverse'
substrate_value = 10
product_value = 0.74554952
K_value=1249

B_value=MDF_Calculation(Concretemodel_Need_Data,product_value,product_id,substrate_name,substrate_value,K_value,E_total,'gurobi')
print("B value : " +str(B_value))

B value : -23.017829311460016


# 2.Maximum growth rate calculation
By taking the MDF above obtained as the lower bound of thermodynamic constraints.

In [93]:
obj_name='BIOMASS_Ec_iML1515_core_75p37M'
obj_target='maximize'
E_total=0.13
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
K_value=1249

max_product_under_mdf=Max_Growth_Rate_Calculation(Concretemodel_Need_Data,obj_name,obj_target,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Max biomass value : " +str(max_product_under_mdf))

Max biomass value : 0.7455495177835006


# 3.Minimum enzyme cost calculation
By fixing the MDF and maximum growth rate above obtained.

In [70]:
product_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.13 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
product_value=max_product_under_mdf
K_value=1249
B_value=B_value
#B_value=0

min_E=Min_Enzyme_Cost_Calculation(Concretemodel_Need_Data,product_value,product_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min enzyme amount : " +str(min_E))

Min enzyme amount : 0.12999999675932342


# 4.Minimum flux sum calculation（pFBA）
Used to simplify the output file below

In [71]:
product_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.13 #eaual to e_pool （0.228）* saturation（0.5）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
product_value=max_product_under_mdf
K_value=1249
B_value=B_value
#B_value=B_0

[min_V,Concretemodel]=Min_Flux_Sum_Calculation(Concretemodel_Need_Data,product_value,product_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min flux amount : " +str(min_V))

Min flux amount : 678.9170044802833


# 5.Pathway information output
It is used to extract the following various lists.

In [72]:
model=Concretemodel_Need_Data['model']
reaction_kcat_MW=Concretemodel_Need_Data['reaction_kcat_MW']
reaction_g0=Concretemodel_Need_Data['reaction_g0']
coef_matrix=Concretemodel_Need_Data['coef_matrix']
metabolite_list=Concretemodel_Need_Data['metabolite_list']
use_result = Get_Results_Thermodynamics(model,Concretemodel,reaction_kcat_MW,reaction_g0,coef_matrix,metabolite_list)
use_result = use_result[use_result['flux'] > 1e-10] 
use_result = use_result.sort_values(by = 'flux',axis = 0,ascending = False)
use_result["reaction"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).reaction, axis = 1)
use_result["gpr"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).gene_reaction_rule, axis = 1)
use_result.to_csv('./Analysis Result/Biomass/' + 'glc10_' + str(round(max_product_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_pathway_3.csv', sep=',', header=True, index=True,mode='w')

# 6.List extraction of candidate bottleneck reactions
Standard: the thermodynamic driving force (f) is equal to MDF value (B) above mentiond

In [73]:
use_result_tmp=use_result[use_result['f']>-1249]
use_result_select=use_result_tmp[abs(use_result_tmp['f']-B_value)<= 1e-5]
use_result_select.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
GAPD,17.110492,1.0,-14.57128,0.001443,;g3p_c : 0.0019372800476507063;pi_c : 4.999999...,g3p_c + nad_c + pi_c --> 13dpg_c + h_c + nadh_c,b1779
TPI,8.150345,1.0,-14.57128,1.4e-05,;dhap_c : 5.74937878586431e-05;g3p_c : 0.00193...,dhap_c --> g3p_c,b3919
MDH,3.448829,1.0,-14.57128,6e-05,;oaa_c : 0.0013300845862784436;h_c : 1.0;mal__...,mal__L_c + nad_c --> h_c + nadh_c + oaa_c,b3236
TKT1,0.948329,1.0,-14.57128,0.000374,;g3p_c : 0.0019372800476507063;r5p_c : 4.99999...,r5p_c + xu5p__D_c --> g3p_c + s7p_c,b2935 or b2465
PRPPS,0.695854,1.0,-14.57128,0.000827,;h_c : 1.0;prpp_c : 0.0012362301025760926;amp_...,atp_c + r5p_c --> amp_c + h_c + prpp_c,b1207
CBMKr,0.467152,1.0,-14.57128,0.000399,;h_c : 1.0;co2_c : 1.3922614394250121e-05;atp_...,atp_c + co2_c + nh4_c --> adp_c + cbp_c + 2.0 h_c,b0521
IPMD,0.335893,1.0,-14.57128,0.000752,;3c4mop_c : 0.009661213454617063;h_c : 1.0;nad...,3c2hmp_c + nad_c --> 3c4mop_c + h_c + nadh_c,b0073 or b1800
DHQTi,0.284143,1.0,-14.57128,3.7e-05,;3dhsk_c : 0.001109745272374513;h2o_c : 1.0;3d...,3dhq_c --> 3dhsk_c + h2o_c,b1693
ACACT5r,0.265326,1.0,-14.57128,0.000221,;3oddcoa_c : 1.770996265282406e-05;accoa_c : 0...,accoa_c + dcacoa_c --> 3oddcoa_c + coa_c,b3845 or b2342
ACACT4r,0.265326,1.0,-14.57128,0.000258,;3odcoa_c : 1.913801709758091e-05;accoa_c : 0....,accoa_c + occoa_c --> 3odcoa_c + coa_c,b2342 or b3845


In [74]:
path_reac_list=list(use_result_select.index)

# 7.Determination of bottleneck reaction
Calculate the maximum thermodynamic driving force for reactions in above list, if the value is still equal to B, it is the bottleneck reaction.

In [75]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_Df_list_fixed=pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Df_Complete,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,max_product_under_mdf,\
                               product_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in path_reac_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_Df_list_fixed.loc[eachindex,'max_Df_complete']=tmp.loc[eachindex,'max_value']

endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_Df_list_fixed.to_csv('./Analysis Result/Biomass/max_min_Df_complete_for_specific_reaction_3.csv', sep=',', header=True, index=True,mode='w')

0:00:21.014734


In [76]:
max_min_Df_list_fixed=max_min_Df_list_fixed.sort_values(by='max_Df_complete',ascending = True)
max_min_Df_list_fixed.head(100)

Unnamed: 0,max_Df_complete
DXYLTD_reverse,-14.571279
CBMKr,-9.48638
2DDARAA_reverse,4.310894
MDH,5.767088
GLU5K,6.467088
PDX5PO2,10.767088
IPMD,15.467088
GAPD,18.777981
DHORTS_reverse,20.828721
TPI,21.828721


In [77]:
Bottleneck_reaction=max_min_Df_list_fixed[(max_min_Df_list_fixed['max_Df_complete']-B_value)<=0.001]
Bottleneck_reaction

Unnamed: 0,max_Df_complete
DXYLTD_reverse,-14.571279


In [78]:
use_result_select.loc[Bottleneck_reaction.index[0],'met_concentration']

';2ddara_c : 0.019999992253620126;dxylnt_c : 4.999999992621096e-07;h2o_c : 1.0'

# 8.List extraction of candidate limiting metabolites
Standard: involved in bottleneck reactions, except fo water (h2o_c) and protons (h_c).

In [79]:
Bottleneck_reaction_lsit=list(Bottleneck_reaction.index)
Bottleneck_reaction_met=[]
for rea in model.reactions:
    if rea.id in Bottleneck_reaction_lsit:
        #print(rea)
        for met in model.metabolites:
            try:
                rea.get_coefficient(met.id)  
            except:
                pass
            else:
                if met.id !='h_c' and met.id !='h2o_c':
                    Bottleneck_reaction_met.append(met.id)
                

Bottleneck_reaction_met=list(set(Bottleneck_reaction_met))
Bottleneck_reaction_met

['dxylnt_c', '2ddara_c']

# 9.Determination of limitting metabolites
Calculate the maximum and minimum concentrations for metabolites in above list, if the two values are equal, it is the limiting metabolite.

In [80]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_concentration_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'maximize',K_value,B_value,\
        max_product_under_mdf,product_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'max_concentration'] = tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'minimize',K_value,B_value,\
        max_product_under_mdf,product_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'min_concentration'] = tmp.loc[eachindex,'min_value']
            
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_concentration_list_fixed.to_csv('./Analysis Result/Biomass/max_min_concentration_for_specific_metabolite_3.csv', sep=',', header=True, index=True,mode='w')


0:00:10.897105


In [81]:
max_min_concentration_list_fixed

Unnamed: 0,max_concentration,min_concentration
dxylnt_c,-14.508657,-14.508658
2ddara_c,-3.912023,-3.912023


In [82]:
Limiting_metabolite = max_min_concentration_list_fixed[(max_min_concentration_list_fixed['max_concentration'] - max_min_concentration_list_fixed['min_concentration']) <= 0.001]
Limiting_metabolite

Unnamed: 0,max_concentration,min_concentration
dxylnt_c,-14.508657,-14.508658
2ddara_c,-3.912023,-3.912023


# 10.List extraction of candidate key enzymes
Standard: The amount of enzyme usage was more than 0.0015 mg/gDW （above 1% of e_pool）

In [83]:
step5_file = pd.read_csv('./Analysis Result/Biomass/' + 'glc10_' + str(round(max_product_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_pathway_3.csv',index_col=0)
step5_file_sort = step5_file.sort_values(by='enz',ascending = False)
step5_file_sort.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
NADH16pp,31.351785,1.0,-9999.0,0.024198,;h_c : 1.0;nadh_c : 0.0007084583731630467;h_p ...,4.0 h_c + nadh_c + q8_c --> 3.0 h_p + nad_c + ...,b2280 and b2281 and b2287 and b2288 and b2282 ...
FBA,8.255695,1.0,-7.412169,0.008402,;g3p_c : 0.0019372800476507063;dhap_c : 5.7493...,fdp_c --> dhap_c + g3p_c,b2925 or b2097
KARA2,0.216604,1.0,20.11657,0.004338,;2ahbut_c : 0.0011251874271059493;nadp_c : 0.0...,2ahbut_c + h_c + nadph_c --> 23dhmp_c + nadp_c,b3774
KARA1_reverse,0.651884,1.0,-3.552714e-15,0.004257,;alac__S_c : 4.999999992621096e-07;nadp_c : 0....,alac__S_c + h_c + nadph_c --> 23dhmb_c + nadp_c,b3774
GHMT2r,0.790937,1.0,14.57128,0.003111,;mlthf_c : 0.002311008755908189;h2o_c : 1.0;gl...,ser__L_c + thf_c --> gly_c + h2o_c + mlthf_c,b2551
SHGO,1.282362,1.0,113.4713,0.002643,;akg_c : 4.999999992621096e-07;o2_c : 4.999999...,S2hglut_c + o2_c --> akg_c + h2o2_c,b2660
CYTBO3_4pp,34.269712,1.0,-9999.0,0.002403,;h_c : 1.0;h2o_c : 1.0;h_p : 1.0;q8h2_c : 4.99...,4.0 h_c + 0.5 o2_c + q8h2_c --> h2o_c + 4.0 h_...,b0429 and b0432 and b0431 and b0430
GND,3.286375,1.0,27.85744,0.002136,;nadp_c : 0.0020000000008562916;6pgc_c : 0.020...,6pgc_c + nadp_c --> co2_c + nadph_c + ru5p__D_c,b2029
SERA,1.282362,1.0,0.2861609,0.001985,;akg_c : 4.999999992621096e-07;3pg_c : 0.02000...,3pg_c + akg_c --> 3php_c + S2hglut_c,
AKGDH,2.671376,1.0,24.55891,0.001976,;co2_c : 1.3922614394250121e-05;coa_c : 4.9999...,akg_c + coa_c + nad_c --> co2_c + nadh_c + suc...,b0116 and b0727 and b0726


In [84]:
enz_use_reaction_list = list(step5_file_sort[step5_file_sort['enz'] > 0.0000013].index)
#enz_use_reaction_list

# 11.Determination of key enzymes.
Calculate the minimum enzyme cost for reactions in above list, and the higher the value, the more critical (key) the enzyme.

In [85]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_E_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,\
                    max_product_under_mdf,product_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'max_E']=tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'minimize',K_value,B_value,\
                    max_product_under_mdf,product_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'min_E']=tmp.loc[eachindex,'min_value']
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_E_list_fixed.to_csv('./Analysis Result/Biomass/max_min_E_3.csv', sep=',', header=True, index=True,mode='w')


0:08:01.962289


In [86]:
max_min_E_list_fixed.sort_values(by='max_E',ascending = False)

Unnamed: 0,max_E,min_E
NADH16pp,0.024198,0.024198
FBA,0.008402,0.008402
KARA2,0.004338,0.004338
KARA1_reverse,0.004257,0.004257
GHMT2r,0.003111,0.003111
...,...,...
FESR,0.000002,0.000002
ADCL,0.000002,0.000002
DPR,0.000002,0.000002
SCYSDS,0.000002,0.000002


# Congratulations！

Now you have completed all the calculations at the first turning point. 
To obtain all the turning point data, 
you need to go back to "1. MDF calculation" 
and change the "product_value=0.1" to "product_ Value=0.7125" (a value slightly larger than the product_value you just calculated), 
you will get the second turning point data. 

Note：
(1) If the new "product_value" is set too high, you may miss some turning point date, but don't worry, the solver gurobi has good differentiation, and a slight increase is the best choice (only add 1‰ or 1% than "product_value" of last round often enough).
(2) Don't forget to change the suffix of each output file name, such as _1, _2, _3, etc.

In the manuscript "Improving pathway prediction accuracy of constraints-based metabolic network models by treating enzymes as microcompartments", 
all the 3 "product_value"s used by the authors are: 0.1, 0.7125 and 0.7444, respectively, 
for reference.

In [1]:
pip list

Package             Version  
------------------- ---------
appdirs             1.4.4    
astroid             2.4.2    
backcall            0.2.0    
certifi             2018.8.24
cobra               0.13.3   
colorama            0.4.4    
cycler              0.10.0   
decorator           4.4.2    
depinfo             1.5.4    
et-xmlfile          1.0.1    
future              0.18.2   
ipykernel           5.3.4    
ipython             7.9.0    
ipython-genutils    0.2.0    
isort               4.3.21   
jdcal               1.4.1    
jedi                0.17.2   
jupyter-client      6.1.6    
jupyter-core        4.6.3    
kiwisolver          1.1.0    
lazy-object-proxy   1.4.3    
matplotlib          3.0.3    
mccabe              0.6.1    
mpmath              1.1.0    
nose                1.3.7    
numpy               1.18.5   
openpyxl            2.6.4    
optlang             1.4.4    
pandas              0.25.3   
parso               0.7.1    
pexpect             4.8.0    
pickleshar

You are using pip version 10.0.1, however version 20.3.4 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.
