# Import related pacakages

In [1]:
import cobra
import sys
sys.path.append(r'./code/')
from cobrapy_ec_model_function import *

# Inputing files

In [2]:
# The genome-scale metabolic model for constructing the enzyme-constrained model
model_name = './data/iML1515.xml' 

# Reaction-kcat file.#s-1
# eg. AADDGT,49389.2889
reaction_kcat_file = "./data/reaction_autopacmenori.csv"

# Gene-abundance file. 
# eg. b0789,1.1
gene_abundance_file = "./data/gene_abundance.csv"

# Gene-molecular_weight file. 
# eg. b0001,thrL,2.13846
gene_molecular_weight_file = "./data/gene_molecular_weight.csv"


# Step1: Preprocessing of model

The reversible reactions in the GEM model are divided into two irreversible reactions. The input is iML1515 with 2712 reactions. The output is a model with 3375 irreversible reactions.

In [3]:
model = cobra.io.read_sbml_model(model_name)
convert_to_irreversible(model)
model

0,1
Name,iML1515
Memory address,0x0281f41d70b8
Number of metabolites,1877
Number of reactions,3375
Number of groups,38
Objective expression,1.0*BIOMASS_Ec_iML1515_core_75p37M - 1.0*BIOMASS_Ec_iML1515_core_75p37M_reverse_35685
Compartments,"cytosol, extracellular space, periplasm"


# Step2: Retrieving enzyme kinetics and proteomics data

The inputs are GEM model. The outputs are 'genes' and 'gpr_relationship' data in the iML1515.

In [4]:
[genes,gpr_relationship] = get_genes_and_gpr(model)

Get the molecular weight of the enzyme (MW) according to the file of all_reaction_GPR.csv, which obtained from the previous step (gpr_relationship, ./analysis/all_reaction_GPR.csv). We need to manually correct the error of the gene_reaction_rule of a small amount of reactions in iML1515 (See Supplementary Table S1 for details), and also need to manually get the subunit of each protein from EcoCyc.

In [5]:
# reaction-gene-subunit-MW file. 
# eg. ALATA_D2,D-alanine transaminase,b2551 or b0870,45.31659 or 36.49471 ,2 or 4 
reaction_gene_subunit_MW = "./data/reaction_gene_subunit_MW.csv"
reaction_mw = calculate_reaction_mw(reaction_gene_subunit_MW)

Calculate kcat/MW. The inputs are 'reaction_kcat' and 'reaction_MW' data for calculating the kcat/MW (When the reaction is catalyzed by several isozymes, the maximum is retained).

In [6]:
save_file="./analysis/reaction_kcat_mw.csv"
reaction_kcat_mw = calculate_reaction_kcat_mw(reaction_kcat_file, reaction_mw, save_file)

Calculate f. The input is 'genes' data, 'gene_abundance.csv' and 'gene_molecular_weight.csv'.

In [7]:
f = calculate_f(genes, gene_abundance_file, gene_molecular_weight_file)
f

0.4059986079578236

# Step3: Save enzyme concentration constraint model as json file.

In [32]:
model_file = './data/iML1515.xml' 
reaction_kcat_mw_file="./analysis/reaction_kcat_mw.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
sigma = 0.5 
#sigma = 1 #kapp data sigma is 1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [34]:
json_model_path="./model/iML1515_irr_enz_constraint_autopacmen.json"
enz_model=get_enzyme_constraint_model(json_model_path)
pfba_solution = cobra.flux_analysis.pfba(enz_model)
pfba_solution_df = pfba_solution.to_frame()
pfba_solution_df.to_csv('./analysis/ECMpy_autopacmenori_solution_df_pfba.csv')
print(pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

norm_model = cobra.io.json.load_json_model(json_model_path)
norm_model_pfba_solution = cobra.flux_analysis.pfba(norm_model)
norm_model_pfba_solution_df = norm_model_pfba_solution.to_frame()
norm_model_pfba_solution_df.to_csv('./analysis/Orimodel_solution_df_pfba.csv')
print(norm_model_pfba_solution_df.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.3438611093118405
0.8769972144269698


# Step4: Calibration parameters

Select calibration reaction

In [35]:
reaction_kcat_mw_file = "./analysis/reaction_kcat_mw.csv"
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen.json"
enzyme_amount = upperbound
percentage = 0.1 #10%
reaction_biomass_outfile = './analysis/reaction_biomass_autopacmen.csv'
select_value = 0.001
df_reaction_select = select_calibration_reaction(reaction_kcat_mw_file, json_model_path, enzyme_amount, percentage, reaction_biomass_outfile, select_value)
df_reaction_select

Unnamed: 0,biomass_diff,biomass_diff_ratio
KARA1_reverse,0.56605,0.645441
ATPS4rpp,0.515047,0.587284
PDH,0.015506,0.017681
PSERT,0.008481,0.00967
AKGDH,0.001017,0.00116


Calibration kcat

In [37]:
def calibration_kcat_kapp(need_change_reaction, reaction_kcat_mw_file, json_model_path, change_kapp_file, reaction_kapp_change_file):
    reaction_kappori = pd.read_csv(reaction_kcat_mw_file, index_col=0)
    kcat_data_colect_file="./data/kcat_data_colect.csv"
    kcat_data_colect = pd.read_csv(kcat_data_colect_file, index_col=0)
    norm_model=cobra.io.json.load_json_model(json_model_path)
    norm_biomass=norm_model.slim_optimize() 
    round_1_reaction_kapp_change = pd.DataFrame()
    for eachreaction in need_change_reaction:
        kcat_ori = reaction_kappori.loc[eachreaction,'kcat']
        kcat_smoment_adj = kcat_data_colect.loc[eachreaction, 'kapp'] * 3600
        if kcat_ori < kcat_smoment_adj:
            reaction_kappori.loc[eachreaction,'kcat'] = kcat_smoment_adj
        reaction_kappori.loc[eachreaction, 'kcat_MW'] = reaction_kappori.loc[eachreaction, 'kcat'] / reaction_kappori.loc[eachreaction,'MW']
        for r in norm_model.reactions:
            with norm_model as model:
                if r.id == eachreaction:
                    r.bounds = (0, reaction_kappori.loc[eachreaction, 'kcat_MW']*0.0228)
                    round_1_reaction_kapp_change.loc[eachreaction,'kcat_ori'] = kcat_ori
                    round_1_reaction_kapp_change.loc[eachreaction,'kcat_change'] = reaction_kappori.loc[eachreaction,'kcat']
                    round_1_reaction_kapp_change.loc[eachreaction,'MW'] = reaction_kappori.loc[eachreaction,'MW']
                    round_1_reaction_kapp_change.loc[eachreaction,'kcat_mw_new'] = reaction_kappori.loc[eachreaction, 'kcat_MW']
                    round_1_reaction_kapp_change.loc[eachreaction,'norm_biomass'] = norm_biomass
                    round_1_reaction_kapp_change.loc[eachreaction,'new_biomass'] = model.slim_optimize()


    round_1_reaction_kapp_change.to_csv(change_kapp_file)
    reaction_kappori.to_csv(reaction_kapp_change_file)


In [38]:
reaction_kcat_mw_file = "./analysis/reaction_kcat_mw.csv"
adj_kcat_title = 'kapp'#'smoment_no_adj_kcat'#'kcat_GO'#'smoment_adj_kcat'#
change_kapp_file = "./analysis/reaction_autopacmen_change_round_1_cb.csv"
reaction_kapp_change_file = "./analysis/reaction_autopacmen_change.csv"
need_change_reaction=list(df_reaction_select.index)

calibration_kcat_kapp(need_change_reaction, reaction_kcat_mw_file, json_model_path, change_kapp_file, reaction_kapp_change_file)


In [39]:
model_file = './data/iML1515.xml' 
reaction_kcat_mw_file="./analysis/reaction_autopacmen_change.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen_adj_round1.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
sigma = 0.5 
#sigma = 1#kcapp数据，饱和度为1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [40]:
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen_adj_round1.json"
enz_model = get_enzyme_constraint_model(json_model_path)
enz_model_pfba_solution = cobra.flux_analysis.pfba(enz_model)
enz_model_pfba_solution = enz_model_pfba_solution.to_frame()
enz_model_pfba_solution.to_csv('./analysis/ECMpy_autopacmen_adj_round1_solution_df_pfba.csv')
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.5915085370429086


Compare with C13 data

In [41]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction = pd.read_csv(c13reaction_file, index_col=0)
c13reaction = list(c13reaction.index)

enz_model_pfba_solution_select = enz_model_pfba_solution[enz_model_pfba_solution['fluxes']>0]
enz_model_pfba_solution_select_id = list(enz_model_pfba_solution_select.index)
c13reaction_2_enz_model_diff=list(set(c13reaction).difference(set(enz_model_pfba_solution_select_id)))
print (c13reaction_2_enz_model_diff)

['MDH', 'ICL', 'PGI', 'SUCDi', 'PFK', 'PFL', 'TPI', 'AKGDH', 'PTAr', 'ACKr_reverse', 'PDH', 'SUCOAS_reverse', 'FBA']


Calibration kcat according c13 reaction list

In [42]:
reaction_kcat_mw_file = "./analysis/reaction_autopacmen_change.csv"
adj_kcat_title = 'kapp'#'smoment_no_adj_kcat'#'kcat_GO'#'smoment_adj_kcat'
change_kapp_file = "./analysis/reaction_autopacmen_change_round_2_cb.csv"
reaction_kapp_change_file = "./analysis/reaction_autopacmen_change_c13.csv"

TCA_reactions=['CS','ACONTa','ACONTb','ICDHyr','MALS', 'MDH', 'ICL', 'SUCOAS_reverse', 'SUCDi', 'AKGDH']
calibration_kcat(TCA_reactions, reaction_kcat_mw_file, json_model_path, adj_kcat_title, change_kapp_file, reaction_kapp_change_file)


In [43]:
model_file = './data/iML1515.xml' 
reaction_kcat_mw_file="./analysis/reaction_autopacmen_change_c13.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen_adj_round2.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
sigma = 0.5 
#sigma = 1#kcapp数据，饱和度为1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [44]:
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen_adj_round2.json"
enz_model = get_enzyme_constraint_model(json_model_path)
enz_model_pfba_solution = cobra.flux_analysis.pfba(enz_model)
enz_model_pfba_solution = enz_model_pfba_solution.to_frame()
enz_model_pfba_solution.to_csv('./analysis/ECMpy_autopacmen_adj_round2_solution_df_pfba.csv')
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.5924444376126025


In [45]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction = pd.read_csv(c13reaction_file, index_col=0)
c13reaction = list(c13reaction.index)

enz_model_pfba_solution_select = enz_model_pfba_solution[enz_model_pfba_solution['fluxes']>0]
enz_model_pfba_solution_select_id = list(enz_model_pfba_solution_select.index)
c13reaction_2_enz_model_diff=list(set(c13reaction).difference(set(enz_model_pfba_solution_select_id)))
print (c13reaction_2_enz_model_diff)

['MDH', 'ICL', 'PGI', 'PFL', 'PFK', 'TPI', 'AKGDH', 'PTAr', 'ACKr_reverse', 'PDH', 'SUCOAS_reverse', 'FBA']


Calibration kcat according Enzyme usage 

In [46]:
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
sigma = 0.5 
#sigma = 1 #kapp data sigma is 1
enz_total = round(ptot * f * sigma, 3)
reaction_flux_file = './analysis/ECMpy_autopacmen_adj_round2_solution_df_pfba.csv' 
reaction_kcat_mw_file = "./analysis/reaction_autopacmen_change_c13.csv"
reaction_enz_usage_file = './analysis/ECMpy_autopacmen_adj_round2_reaction_enz_usage_df.csv'

reaction_enz_usage_df = get_enzyme_usage(enz_total,reaction_flux_file,reaction_kcat_mw_file,reaction_enz_usage_file)
reaction_enz_usage_df.head(10)

Unnamed: 0,kcat_mw,flux,enz useage,enz ratio
KARA1_reverse,38.284397,0.518014,0.013531,0.11869
GND,2633.457211,28.453199,0.010805,0.094776
TKT2,1156.921394,9.141693,0.007902,0.069314
PGL,5091.247502,28.453199,0.005589,0.049023
ATPS4rpp,12744.657385,59.553465,0.004673,0.04099
PAPSR2,33.787531,0.146619,0.004339,0.038065
KARA2,42.209426,0.172122,0.004078,0.03577
ENO,2525.688868,8.038974,0.003183,0.02792
CYTBO3_4pp,20262.035207,62.559977,0.003088,0.027084
GAPD,2950.918009,9.090239,0.00308,0.027022


In [47]:
def change_reaction_kcat_by_kapp(select_reaction,reaction_kcat_mw_file,reaction_kapp_change_file):
    reaction_kcat_mw = pd.read_csv(reaction_kcat_mw_file, index_col=0)
    kcat_data_colect_file="./data/kcat_data_colect.csv"
    kcat_data_colect = pd.read_csv(kcat_data_colect_file, index_col=0)

    reaction_change_accord_fold=[]
    for eachreaction in select_reaction:
        if reaction_kcat_mw.loc[eachreaction,'kcat'] < kcat_data_colect.loc[eachreaction, 'kapp']  * 3600:
            print(reaction_kcat_mw.loc[eachreaction,'kcat'] ,kcat_data_colect.loc[eachreaction, 'kapp']  * 3600)
            reaction_kcat_mw.loc[eachreaction,'kcat'] = kcat_data_colect.loc[eachreaction, 'kapp']  * 3600
            reaction_kcat_mw.loc[eachreaction,'kcat_MW'] = kcat_data_colect.loc[eachreaction, 'kapp']  * 3600/reaction_kcat_mw.loc[eachreaction,'MW']
            reaction_change_accord_fold.append(eachreaction)
        else:
            pass
    reaction_kcat_mw.to_csv(reaction_kapp_change_file)
    return(reaction_change_accord_fold)

In [48]:
select_reaction = list(reaction_enz_usage_df[reaction_enz_usage_df['enz ratio']>0.01].index)#more than 2%
print(select_reaction)
reaction_kcat_mw_file = "./analysis/reaction_autopacmen_change_c13.csv"
reaction_kapp_change_file = "./analysis/reaction_autopacmen_change_c13_enzuse.csv"

change_reaction_list=change_reaction_kcat_by_kapp(select_reaction,reaction_kcat_mw_file,reaction_kapp_change_file)
print(change_reaction_list)

['KARA1_reverse', 'GND', 'TKT2', 'PGL', 'ATPS4rpp', 'PAPSR2', 'KARA2', 'ENO', 'CYTBO3_4pp', 'GAPD', 'PSERT', 'ACGS', 'PPC', 'SADT2', 'ADSS', 'IPPMIa_reverse', 'IPPMIb_reverse', 'ACCOAC', 'ASNS2', 'ACOTA_reverse', 'IPPS', 'CTPS2', 'GLNS']
84504.66232 324065.525472
184850.9778 662400.0
2199.2 25460.248013999997
18918.68531 820800.0
20816.8666 102441.80628000002
7565.532908 11880.0
9598.568465 106566.407064
9598.568465 123242.323212
7282.8 68400.0
5653.495904 32625.378558
11161.0094 36000.0
16891.99541 39600.0
['TKT2', 'PGL', 'PAPSR2', 'ACGS', 'SADT2', 'ADSS', 'IPPMIa_reverse', 'IPPMIb_reverse', 'ASNS2', 'ACOTA_reverse', 'IPPS', 'CTPS2']


In [49]:
model_file = './data/iML1515.xml' 
reaction_kcat_mw_file="./analysis/reaction_autopacmen_change_c13_enzuse.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen_adj_round3.json"
f = 0.406
ptot = 0.56 
sigma = 0.5#kcapp数据，饱和度为1
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model(model_file, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [50]:
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen_adj_round3.json"
enz_model = get_enzyme_constraint_model(json_model_path)
enz_model_pfba_solution = cobra.flux_analysis.pfba(enz_model)
enz_model_pfba_solution = enz_model_pfba_solution.to_frame()
enz_model_pfba_solution.to_csv('./analysis/ECMpy_autopacmen_adj_round3_solution_df_pfba.csv')
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.714282760292871


In [51]:
c13reaction_file = './data/C13reaction.csv' 
c13reaction = pd.read_csv(c13reaction_file, index_col=0)
c13reaction = list(c13reaction.index)

enz_model_pfba_solution_select = enz_model_pfba_solution[enz_model_pfba_solution['fluxes']>0]
enz_model_pfba_solution_select_id = list(enz_model_pfba_solution_select.index)
c13reaction_2_enz_model_diff=list(set(c13reaction).difference(set(enz_model_pfba_solution_select_id)))
print (c13reaction_2_enz_model_diff)#酶模型的ID ['PTAr_reverse', 'ACKr']

['MDH', 'ICL', 'PGI', 'PFL', 'PFK', 'TPI', 'AKGDH', 'PYK', 'PTAr', 'ACKr_reverse', 'PDH', 'SUCOAS_reverse', 'FBA']


# Step4: Solveing enzyme concentration constraint by COBRApy.

In [24]:
#run enzyme constraint metabolic model
json_model_path="./model/iML1515_irr_enz_constraint_adj_round3.json"
enz_model=get_enzyme_constraint_model(json_model_path)
pfba_solution = cobra.flux_analysis.pfba(enz_model)
pfba_solution_df = pfba_solution.to_frame()
#pfba_solution_df.to_csv('./analysis/ECMpy_solution_df_pfba.csv')
pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M']

0.7894976064941117

In [25]:
#run genome-scale metabolic model
norm_model=cobra.io.json.load_json_model(json_model_path)
pfba_solution = cobra.flux_analysis.pfba(norm_model)
pfba_solution_df = pfba_solution.to_frame()
#pfba_solution_df.to_csv('./analysis/Orimodel_solution_df_pfba.csv')
pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M']


0.8769972144269698