# Module import

In [1]:
from ETGEMs_function import *

# Data initialization

In [2]:
import pandas as pd
import cobra
#Get Reaction G0 from local file
reaction_g0_file='./Basic Data/reaction_g0.txt'
#Get Metabolite concentration from local file
metabolites_lnC_file = './Basic Data/metabolites_lnC.txt'
#Get Model from local file
model_file='./Basic Data/iML1515.xml'
#Get reaction kcat data from ECMpy
reaction_kcat_MW_file='./Basic Data/ID_kcat_MW_file.csv'

Concretemodel_Need_Data=Get_Concretemodel_Need_Data(reaction_g0_file,metabolites_lnC_file,model_file,reaction_kcat_MW_file)

# Data output files

In [3]:
max_min_Df_for_specific_reaction_file='./Analysis Result/max_min_Df_complete_for_specific_reaction.csv'
max_min_concentration_for_specific_metabolite_file='./Analysis Result/max_min_concentration_for_specific_metabolite.csv'
max_min_E_file='./Analysis Result/max_min_E.csv'

# 1.MDF calculation
Preset a lower growth rate, for example, when set "biomass_value = 0.3", and then get an MDF value (2.667); when set "biomass_value = 0.7074", and then get an MDF value (-14.571).

In [4]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.13 #equal to e_pool （0.19）* saturation（0.68）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=0.7074
K_value=1249

B_value=MDF_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,'gurobi')
print("B value : " +str(B_value))

B value : -0.4818674321377183


# 2.Maximum growth rate calculation
By taking the MDF above obtained as the lower bound of thermodynamic constraints.

In [5]:
obj_name='BIOMASS_Ec_iML1515_core_75p37M'
obj_target='maximize'
E_total=0.13 #equal to e_pool （0.19）* saturation（0.68）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
K_value=1249

max_biomass_under_mdf=Max_Growth_Rate_Calculation(Concretemodel_Need_Data,obj_name,obj_target,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Max biomass value : " +str(max_biomass_under_mdf))

Max biomass value : 0.7313735882567505


# 3.Minimum enzyme cost calculation
By fixing the MDF and maximum growth rate above obtained.

In [6]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.13 #equal to e_pool （0.19）* saturation（0.68）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=max_biomass_under_mdf
K_value=1249
B_value=B_value

min_E=Min_Enzyme_Cost_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min enzyme amount : " +str(min_E))

Min enzyme amount : 0.1299999967830606


# 4.Minimum flux sum calculation（pFBA）
Used to simplify the output file below

In [7]:
biomass_id='BIOMASS_Ec_iML1515_core_75p37M'
E_total=0.13 #equal to e_pool （0.19）* saturation（0.68）
substrate_name='EX_glc__D_e_reverse'
substrate_value=10
biomass_value=max_biomass_under_mdf
K_value=1249
B_value=B_value

[min_V,Concretemodel]=Min_Flux_Sum_Calculation(Concretemodel_Need_Data,biomass_value,biomass_id,substrate_name,substrate_value,K_value,E_total,B_value,'gurobi')
print("Min flux amount : " +str(min_V))

Min flux amount : 668.3961096998348


# 5.Pathway information output
It is used to extract the following various lists.

In [8]:
model=Concretemodel_Need_Data['model']
reaction_kcat_MW=Concretemodel_Need_Data['reaction_kcat_MW']
reaction_g0=Concretemodel_Need_Data['reaction_g0']
coef_matrix=Concretemodel_Need_Data['coef_matrix']
metabolite_list=Concretemodel_Need_Data['metabolite_list']
use_result = Get_Results_Thermodynamics(model,Concretemodel,reaction_kcat_MW,reaction_g0,coef_matrix,metabolite_list)
use_result = use_result[use_result['flux'] > 1e-10] 
use_result = use_result.sort_values(by = 'flux',axis = 0,ascending = False)
use_result["reaction"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).reaction, axis = 1)
use_result["gpr"] = use_result.apply(lambda row: model.reactions.get_by_id(row.name).gene_reaction_rule, axis = 1)
use_result.to_csv('./Analysis Result/' + str(round(max_biomass_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_EcoTCM_6-1.csv', sep=',', header=True, index=True,mode='w')

# 6.List extraction of candidate bottleneck reactions
Standard: the thermodynamic driving force (f) is equal to MDF value (B) above mentiond

In [9]:
use_result_tmp=use_result[use_result['f']>-1249]
use_result_select=use_result_tmp[abs(use_result_tmp['f']-B_value)<=1e-05]
use_result_select.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
PGK_reverse,17.072314,1.0,-0.481867,0.001186,;3pg_c : 0.009036352331199865;atp_c : 2.600959...,13dpg_c + adp_c --> 3pg_c + atp_c,b2926
GAPD,17.072314,1.0,-0.481867,0.00144,;nadh_c : 4.999999992621078e-07;pi_c : 0.02000...,g3p_c + nad_c + pi_c --> 13dpg_c + h_c + nadh_c,b1779
FBA,8.195743,1.0,-0.481859,0.008341,;g3p_c : 0.0012150780459455337;dhap_c : 0.0085...,fdp_c --> dhap_c + g3p_c,b2925 or b2097
TPI,8.092395,1.0,-0.481867,1.3e-05,;g3p_c : 0.0012150780459455337;dhap_c : 0.0085...,dhap_c --> g3p_c,b3919
PGI,6.496757,1.0,-0.481867,0.000529,;g6p_c : 3.376421801088202e-05;f6p_c : 1.52601...,g6p_c --> f6p_c,b4025
MDH,2.782968,1.0,-0.481867,4.8e-05,;mal__L_c : 0.001773078669043697;nadh_c : 4.99...,mal__L_c + nad_c --> h_c + nadh_c + oaa_c,b3236
FUM,2.782968,1.0,-0.481867,6.8e-05,;mal__L_c : 0.001773078669043697;fum_c : 0.000...,fum_c + h2o_c --> mal__L_c,b4122 or b1612 or b1611
PPA,2.561443,1.0,-0.481867,0.000166,;ppi_c : 6.207153639009681e-07;pi_c : 0.020000...,h2o_c + ppi_c --> h_c + 2.0 pi_c,b2502 or b2744 or b4226
ASPTA_reverse,2.141332,1.0,-0.481867,0.001233,;glu__L_c : 0.0013516071481578973;oaa_c : 4.99...,glu__L_c + oaa_c --> akg_c + asp__L_c,b0928 or b4054
RPE,1.768092,1.0,-0.481867,1.9e-05,;ru5p__D_c : 0.000136003714097665;xu5p__D_c : ...,ru5p__D_c --> xu5p__D_c,b3386


In [10]:
path_reac_list=list(use_result_select.index)

# 7.Determination of bottleneck reaction
Calculate the maximum thermodynamic driving force for reactions in above list, if the value is still equal to B, it is the bottleneck reaction.

In [11]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_Df_list_fixed=pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Df_Complete,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,max_biomass_under_mdf,\
                               biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in path_reac_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_Df_list_fixed.loc[eachindex,'max_Df_complete']=tmp.loc[eachindex,'max_value']

endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_Df_list_fixed.to_csv(max_min_Df_for_specific_reaction_file, sep=',', header=True, index=True,mode='w')

0:00:37.795067


In [12]:
max_min_Df_list_fixed=max_min_Df_list_fixed.sort_values(by='max_Df_complete',ascending = True)
max_min_Df_list_fixed.head(10)

Unnamed: 0,max_Df_complete
GAPD,-0.481863
TPI,-0.481859
FBA,-0.481859
PGCD,-0.481819
PGK_reverse,-0.481812
ACGK,2.667088
MDH,5.767088
GLU5K,6.467088
FUM,9.648955
IPPMIa_reverse,10.030823


In [13]:
Bottleneck_reaction=max_min_Df_list_fixed[(max_min_Df_list_fixed['max_Df_complete']-B_value)<=0.001]
Bottleneck_reaction

Unnamed: 0,max_Df_complete
GAPD,-0.481863
TPI,-0.481859
FBA,-0.481859
PGCD,-0.481819
PGK_reverse,-0.481812


In [14]:
use_result_select.loc[Bottleneck_reaction.index[0],'met_concentration']

';nadh_c : 4.999999992621078e-07;pi_c : 0.020000000008563314;nad_c : 4.999999992621083e-06;13dpg_c : 5.319055209562076e-05;h_c : 1.0;g3p_c : 0.0012150780459455337'

# 8.List extraction of candidate limiting metabolites
Standard: involved in bottleneck reactions, except fo water (h2o_c) and protons (h_c).

In [15]:
Bottleneck_reaction_lsit=list(Bottleneck_reaction.index)
Bottleneck_reaction_met=[]
for rea in model.reactions:
    if rea.id in Bottleneck_reaction_lsit:
        #print(rea)
        for met in model.metabolites:
            try:
                rea.get_coefficient(met.id)  
            except:
                pass
            else:
                if met.id !='h_c' and met.id !='h2o_c':
                    Bottleneck_reaction_met.append(met.id)
                

Bottleneck_reaction_met=list(set(Bottleneck_reaction_met))
Bottleneck_reaction_met

['3php_c',
 '3pg_c',
 '13dpg_c',
 'pi_c',
 'nad_c',
 'fdp_c',
 'atp_c',
 'adp_c',
 'nadh_c',
 'dhap_c',
 'g3p_c']

# 9.Determination of limitting metabolites
Calculate the maximum and minimum concentrations for metabolites in above list, if the two values are equal, it is the limiting metabolite.

In [16]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_concentration_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'maximize',K_value,B_value,\
        max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'max_concentration'] = tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_Met_Concentration,Concretemodel_Need_Data,eachmet,'minimize',K_value,B_value,\
        max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,list(Bottleneck_reaction.index),'gurobi'): eachmet for eachmet in Bottleneck_reaction_met}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_concentration_list_fixed.loc[eachindex,'min_concentration'] = tmp.loc[eachindex,'min_value']
            
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_concentration_list_fixed.to_csv(max_min_concentration_for_specific_metabolite_file, sep=',', header=True, index=True,mode='w')


0:00:22.858933


In [17]:
max_min_concentration_list_fixed

Unnamed: 0,max_concentration,min_concentration
3pg_c,-4.706498,-4.7065
13dpg_c,-9.841628,-9.84163
pi_c,-3.912023,-3.912025
3php_c,-14.508656,-14.508658
nad_c,-3.912023,-12.206073
atp_c,-3.912023,-12.206073
fdp_c,-3.912023,-3.912026
adp_c,-6.214608,-14.508658
nadh_c,-6.214608,-14.508658
dhap_c,-4.767177,-4.76718


In [18]:
Limiting_metabolite = max_min_concentration_list_fixed[(max_min_concentration_list_fixed['max_concentration'] - max_min_concentration_list_fixed['min_concentration']) <= 0.001]
Limiting_metabolite

Unnamed: 0,max_concentration,min_concentration
3pg_c,-4.706498,-4.7065
13dpg_c,-9.841628,-9.84163
pi_c,-3.912023,-3.912025
3php_c,-14.508656,-14.508658
fdp_c,-3.912023,-3.912026
dhap_c,-4.767177,-4.76718
g3p_c,-6.712945,-6.712947


# 10.List extraction of candidate key enzymes
Standard: The amount of enzyme usage was more than 0.0013 g/gDW （above 1% of e_pool,eff）

In [19]:
step5_file = pd.read_csv('./Analysis Result/' + str(round(max_biomass_under_mdf,3)) + '_' + str(round(B_value,3)) + '_' + str(round(min_E,3)) + '_EcoTCM_6-1.csv',index_col=0)
step5_file_sort = step5_file.sort_values(by='enz',ascending = False)
step5_file_sort.head(10)

Unnamed: 0,flux,z,f,enz,met_concentration,reaction,gpr
NADH16pp,31.346475,1.0,-9999.0,0.024194,;nadh_c : 4.999999992621078e-07;h_p : 1.0;nad_...,4.0 h_c + nadh_c + q8_c --> 3.0 h_p + nad_c + ...,b2280 and b2281 and b2287 and b2288 and b2282 ...
FBA,8.195743,1.0,-0.481859,0.008341,;g3p_c : 0.0012150780459455337;dhap_c : 0.0085...,fdp_c --> dhap_c + g3p_c,b2925 or b2097
CBPS,0.458269,1.0,7.797594,0.005718,;hco3_c : 3.3502907600691315e-07;gln__L_c : 4....,2.0 atp_c + gln__L_c + h2o_c + hco3_c --> 2.0 ...,b0032 and b0033
KARA2,0.212485,1.0,6.027153,0.004255,;nadp_c : 0.0020000000008562916;23dhmp_c : 0.0...,2ahbut_c + h_c + nadph_c --> 23dhmp_c + nadp_c,b3774
KARA1_reverse,0.639489,1.0,15.73564,0.004176,;h_c : 1.0;nadph_c : 0.02000000000856292;alac_...,alac__S_c + h_c + nadph_c --> 23dhmb_c + nadp_c,b3774
GHMT2r,0.775898,1.0,2.244747,0.003052,;ser__L_c : 4.999999992621096e-07;thf_c : 0.00...,ser__L_c + thf_c --> gly_c + h2o_c + mlthf_c,b2551
CYTBO3_4pp,33.608635,1.0,-9999.0,0.002357,;h_p : 1.0;o2_c : 4.999999992621096e-07;h_c : ...,4.0 h_c + 0.5 o2_c + q8h2_c --> h2o_c + 4.0 h_...,b0429 and b0432 and b0431 and b0430
GND,3.503243,1.0,27.662496,0.002277,;6pgc_c : 0.0007978297528964933;ru5p__D_c : 0....,6pgc_c + nadp_c --> co2_c + nadph_c + ru5p__D_c,b2029
PGM_reverse,15.855908,1.0,15.397356,0.001864,;2pg_c : 4.706599742980262e-06;3pg_c : 0.00903...,3pg_c --> 2pg_c,b0755 or b3612
ATPS4rpp,55.181813,1.0,-9999.0,0.001857,;h_p : 1.0;atp_c : 2.6009595205285837e-05;adp_...,adp_c + 4.0 h_p + pi_c --> atp_c + h2o_c + 3.0...,( b3734 and b3733 and b3731 and b3735 and b373...


In [20]:
enz_use_reaction_list = list(step5_file_sort[step5_file_sort['enz'] > 0.0013].index)
#enz_use_reaction_list

# 11.Determination of key enzymes.
Calculate the minimum enzyme cost for reactions in above list, and the higher the value, the more critical (key) the enzyme.

In [21]:
import pandas as pd
import numpy as np
import datetime
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ as pyo
from concurrent.futures import ProcessPoolExecutor, as_completed
starttime = datetime.datetime.now()
max_min_E_list_fixed = pd.DataFrame()

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'maximize',K_value,B_value,\
                    max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'max_E']=tmp.loc[eachindex,'max_value']

with ProcessPoolExecutor() as executor:
    futures = {executor.submit(Get_Max_Min_E,Concretemodel_Need_Data,eachreaction,'minimize',K_value,B_value,\
                    max_biomass_under_mdf,biomass_id,E_total,substrate_name,substrate_value,'gurobi'): eachreaction for eachreaction in enz_use_reaction_list}
    for future in as_completed(futures):
        tmp = future.result()
        for eachindex in tmp.index:
            #print(eachindex,tmp.loc[eachindex,'max_value'])
            max_min_E_list_fixed.loc[eachindex,'min_E']=tmp.loc[eachindex,'min_value']
endtime = datetime.datetime.now()
print (endtime - starttime)
max_min_E_list_fixed.to_csv(max_min_E_file, sep=',', header=True, index=True,mode='w')


0:00:31.940128


In [22]:
max_min_E_list_fixed.sort_values(by='max_E',ascending = False)

Unnamed: 0,max_E,min_E
NADH16pp,0.024194,0.024194
FBA,0.008341,0.008341
CBPS,0.005718,0.005718
KARA2,0.004255,0.004255
KARA1_reverse,0.004176,0.004176
GHMT2r,0.003052,0.003052
CYTBO3_4pp,0.002357,0.002357
GND,0.002277,0.002277
PGM_reverse,0.001864,0.001864
ATPS4rpp,0.001857,0.001857
