# Import related pacakages

In [1]:
import cobra
import sys
sys.path.append(r'./code/')
from cobrapy_ec_model_function import *

# Inputing files

In [2]:
# The genome-scale metabolic model for constructing the enzyme-constrained model
model_name = './model/iML1515_new.xml' 

# Reaction-kcat file.#s-1
# eg. AADDGT,49389.2889
#reaction_kcat_file = "./data/reaction_autopacmen_ori_noadj.csv"
#reaction_kcat_file = "./data/reaction_kappori.csv"
#reaction_kcat_file = "./data/reaction_autopacmen_ori_adj.csv"
#reaction_kcat_file = "./data/reaction_kappori_change.csv"
reaction_kcat_file = "./data/reaction_autopacmen_ori_adj_change.csv"

# Gene-abundance file. 
# eg. b0789,1.1
gene_abundance_file = "./data/gene_abundance.csv"

# Gene-molecular_weight file. 
# eg. b0001,thrL,2.13846
gene_molecular_weight_file = "./data/gene_molecular_weight.csv"


# Step1: Preprocessing of model

The reversible reactions in the GEM model are divided into two irreversible reactions and splited isoenzyme. The input is iML1515 with 2712 reactions. The output is a model with 3375 irreversible reactions.

In [3]:
model = cobra.io.read_sbml_model(model_name)
convert_to_irreversible(model)
#split isoenzyme
model = isoenzyme_split_only(model)
model

0,1
Name,iML1515
Memory address,0x019b6f8a00b8
Number of metabolites,1877
Number of reactions,5883
Number of groups,38
Objective expression,1.0*BIOMASS_Ec_iML1515_core_75p37M - 1.0*BIOMASS_Ec_iML1515_core_75p37M_reverse_35685
Compartments,"cytosol, extracellular space, periplasm"


# Step2: Retrieving enzyme kinetics and proteomics data

The inputs are GEM model. The outputs are 'genes' and 'gpr_relationship' data in the iML1515.

In [4]:
[genes,gpr_relationship] = get_genes_and_gpr(model)

Get the molecular weight of the enzyme (MW) according to the file of all_reaction_GPR.csv, which obtained from the previous step (gpr_relationship, ./analysis/all_reaction_GPR.csv). We need to manually correct the error of the gene_reaction_rule of a small amount of reactions in iML1515 (See Supplementary Table S1 for details), and also need to manually get the subunit of each protein from EcoCyc.

In [5]:
reaction_gene_subunit_MW = pd.read_csv("./data/reaction_gene_subunit_MW_20201123.csv", index_col=0)
reaction_gene_subunit_MW_split_only(reaction_gene_subunit_MW)

In [7]:
# reaction-gene-subunit-MW file. 
# eg. ALATA_D2,D-alanine transaminase,b2551 or b0870,45.31659 or 36.49471 ,2 or 4 
reaction_gene_subunit_MW = "./data/reaction_gene_subunit_MW.csv"
reaction_mw = calculate_reaction_mw(reaction_gene_subunit_MW)

Calculate kcat/MW. The inputs are 'reaction_kcat' and 'reaction_MW' data for calculating the kcat/MW (When the reaction is catalyzed by several isozymes, the maximum is retained).

In [8]:
save_file="./analysis/reaction_kcat_mw.csv"
reaction_kcat_mw = calculate_reaction_kcat_mw(reaction_kcat_file, reaction_mw, save_file)

Calculate f. The input is 'genes' data, 'gene_abundance.csv' and 'gene_molecular_weight.csv'.

In [9]:
f = calculate_f(genes, gene_abundance_file, gene_molecular_weight_file)
f

0.4059986079578236

# Step3: Save enzyme concentration constraint model as json file.

In [13]:
model_file = './model/iML1515_new.xml' 
#*2
#PDH,1459824,608.26,2400
#SUCOAS_reverse,1447200,142.3403,10167.1839
reaction_kcat_mw_file="./analysis/reaction_kcat_mw.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
#sigma = 0.5 
sigma = 1 #kapp data sigma is 1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model_split_isoenzyme_only(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [14]:
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen.json"
fluxes_outfile = './analysis/ECMpy_ori_solution_df_pfba.csv'
reaction_kcat_mw_file="./analysis/reaction_kcat_mw.csv"
enz_model=get_enzyme_constraint_model(json_model_path)
enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

norm_model = cobra.io.json.load_json_model(json_model_path)
norm_model_pfba_solution = cobra.flux_analysis.pfba(norm_model)
norm_model_pfba_solution_df = norm_model_pfba_solution.to_frame()
norm_model_pfba_solution_df.to_csv('./analysis/Orimodel_solution_df_pfba.csv')
print(norm_model_pfba_solution_df.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.6333359344090235
0.869772642032012


# Step4: Calibration parameters

Compare with C13 data

In [15]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction_2_enz_model_diff = get_diff_reaction_use_c13(c13reaction_file,enz_model_pfba_solution)
print (c13reaction_2_enz_model_diff)

['PPC', 'FBA', 'PFK', 'ME2', 'TPI', 'SUCOAS_reverse', 'PYK', 'PGI', 'TKT2', 'AKGDH']


Select calibration reaction accrording biomass diff

In [16]:
reaction_kcat_mw_file = "./analysis/reaction_kcat_mw.csv"
kcat_data_colect_file="./analysis/reaction_max_df.csv"
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen.json"
model_file = './model/iML1515_new.xml'
percentage = 0.1#10%
select_percentage = 0.8
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
#sigma = 0.5 
sigma = 1#kcapp数据，饱和度为1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

json_output_file="./model/iML1515_irr_enz_constraint_adj_round1.json"
reaction_biomass_outfile = './analysis/reaction_biomass.csv'
[df_reaction_select,enz_model]=get_enz_model_use_biomass_diff(reaction_kcat_mw_file, json_model_path, percentage, reaction_biomass_outfile, select_percentage,kcat_data_colect_file, model_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

if isinstance(df_reaction_select, pd.DataFrame):    
    reaction_kcat_mw_file="./analysis/reaction_change_by_biomass.csv"
fluxes_outfile= './analysis/ECMpy_adj_round1_solution_df_pfba.csv'
enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

['KARA1_reverse']
0.6871016739244613


In [17]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction_2_enz_model_diff = get_diff_reaction_use_c13(c13reaction_file,enz_model_pfba_solution)
print (c13reaction_2_enz_model_diff)


['PPC', 'ME2', 'TPI', 'SUCOAS_reverse', 'PYK', 'PGI', 'AKGDH']


Calibration kcat according c13 reaction list

In [18]:
c13reaction_file = './data/C13reaction.csv' 
reaction_kcat_mw_file = "./analysis/reaction_kcat_mw.csv"
model_file = './model/iML1515_new.xml'

f = 0.406
ptot = 0.56 
sigma = 1
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)
percentage=0.1

json_output_file="./model/iML1515_irr_enz_constraint_adj_round2.json"
enz_model=get_enz_model_use_c13(reaction_kcat_mw_file, c13reaction_file, percentage, df_reaction_select,kcat_data_colect_file,model_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

reaction_kcat_mw_file="./analysis/reaction_change_by_c13.csv"
fluxes_outfile = './analysis/ECMpy_adj_round2_solution_df_pfba.csv'
enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

[]
[]
0.6871016739244613


In [19]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction_2_enz_model_diff = get_diff_reaction_use_c13(c13reaction_file,enz_model_pfba_solution)
print (c13reaction_2_enz_model_diff)


['PPC', 'ME2', 'TPI', 'SUCOAS_reverse', 'PYK', 'PGI', 'AKGDH']


Calibration kcat according Enzyme usage 

In [25]:
f = 0.406
ptot = 0.56 
sigma = 1
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)
enz_ratio=0.01
reaction_flux_file = './analysis/ECMpy_adj_round2_solution_df_pfba.csv' 
reaction_kcat_mw_file = "./analysis/reaction_change_by_c13.csv"
reaction_enz_usage_file = './analysis/ECMpy_adj_round2_reaction_enz_usage_df.csv'
kcat_data_colect_file="./analysis/reaction_max_df.csv"
model_file = './model/iML1515_new.xml'

json_output_file = "./model/iML1515_irr_enz_constraint_adj_round3.json"
enz_model=get_enz_model_use_enz_usage(enz_ratio,reaction_flux_file,reaction_kcat_mw_file,reaction_enz_usage_file,kcat_data_colect_file, model_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

reaction_kcat_mw_file="./analysis/reaction_change_by_enzuse.csv"
fluxes_outfile = './analysis/ECMpy_adj_round3_solution_df_pfba.csv'
enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

['ATPS4rpp_num2', 'NADH16pp', 'PSERT', 'ACCOAC', 'PDH', 'PAPSR2_num2', 'KARA2', 'CS', 'ACGS', 'ENO', 'GAPD', 'SADT2', 'ADSS', 'PGL', 'IPPMIa_reverse', 'IPPMIb_reverse']
['ATPS4rpp_num2', 'NADH16pp', 'PSERT', 'ACCOAC', 'PAPSR2_num2', 'KARA2', 'CS', 'ACGS', 'ENO', 'SADT2', 'ADSS', 'IPPMIa_reverse', 'IPPMIb_reverse']
0.8697726420320072


In [26]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction_2_enz_model_diff = get_diff_reaction_use_c13(c13reaction_file,enz_model_pfba_solution)
print (c13reaction_2_enz_model_diff)


['ME2', 'TALA', 'PTAr', 'ACKr_reverse', 'PYK', 'PFL', 'ICL']


In [30]:
reaction_kcat_mw_file = "./analysis/reaction_kapp_change_c13_enzuse.csv"
reaction_kapp_change_file = "./analysis/reaction_kapp_change_c13_enzuse_manual.csv"
select_reaction=['PYK_num1']
#c13reaction_selecet=['CS','ACONTa','ACONTb','ICDHyr','MALS', 'MDH', 'ICL', 'SUCOAS_reverse', 'SUCDi', 'AKGDH']
change_fold=[2]
change_reaction_list_round1=change_reaction_kcat_by_foldlist(select_reaction,change_fold,reaction_kcat_mw_file,reaction_kapp_change_file)
print(change_reaction_list_round1)

model_file = './model/iML1515_new.xml'
reaction_kcat_mw_file="./analysis/reaction_kapp_change_c13_enzuse_manual.csv"
json_output_file="./model/iML1515_irr_enz_constraint_adj_manual.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
sigma = 1#0.5 
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model_split_isoenzyme_only(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

json_model_path = "./model/iML1515_irr_enz_constraint_adj_manual.json"
fluxes_outfile = './analysis/ECMpy_kapp_adj_manual_solution_df_pfba.csv'
reaction_kcat_mw_file="./analysis/reaction_kapp_change_c13_enzuse_manual.csv"
enz_model=get_enzyme_constraint_model(json_model_path)

enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

c13reaction_file = './data/C13reaction.csv' 
c13reaction_2_enz_model_diff = get_diff_reaction_use_c13(c13reaction_file,enz_model_pfba_solution)
print (c13reaction_2_enz_model_diff)

['PYK_num1']
0.6755599180355496
['ICL', 'ME2', 'PFL', 'SUCOAS_reverse']


# Step4: Solveing enzyme concentration constraint by COBRApy.

In [21]:
#run enzyme constraint metabolic model
json_model_path="./model/iML1515_irr_enz_constraint_adj_round2.json"
enz_model=get_enzyme_constraint_model(json_model_path)
pfba_solution = cobra.flux_analysis.pfba(enz_model)
pfba_solution_df = pfba_solution.to_frame()
#pfba_solution_df.to_csv('./analysis/ECMpy_solution_df_pfba.csv')
pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M']

0.7901116510198356

In [22]:
#run genome-scale metabolic model
norm_model=cobra.io.json.load_json_model(json_model_path)
pfba_solution = cobra.flux_analysis.pfba(norm_model)
pfba_solution_df = pfba_solution.to_frame()
#pfba_solution_df.to_csv('./analysis/Orimodel_solution_df_pfba.csv')
pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M']


0.8769972144269698