# Import related pacakages

In [1]:
import cobra
import sys
sys.path.append(r'./code/')
from cobrapy_ec_model_function import *

# Inputing files

In [11]:
# The genome-scale metabolic model for constructing the enzyme-constrained model
model_name = './model/iML1515_new.xml' 
#model_name = './data/iML1515.xml' 

# Reaction-kcat file.#s-1
# eg. AADDGT,"forward": 1.30037482842091
#reaction_kcat_file = "./data/autopacmen/iml1515_new_median_reactions_kcat_mapping_combined.json"
reaction_kcat_file = "./data/autopacmen/iml1515_new_mean_reactions_kcat_mapping_combined.json"

# Gene-abundance file. 
# eg. b0789,1.1
gene_abundance_file = "./data/gene_abundance.csv"

# Gene-molecular_weight file. 
# eg. b3500,48771.94 #Da需要除以1000为kDa
gene_molecular_weight_file = "./data/gene_molecular_weight.csv"
#protein_molecular_weight_file = "./data/autopacmen/iml1515_new_median_protein_id_mass_mapping.json"
protein_molecular_weight_file = "./data/autopacmen/iml1515_new_mean_protein_id_mass_mapping.json"

gene_outfile = "./analysis/genes.csv"
gpr_outfile = "./analysis/all_reaction_GPR.csv"
reaction_gene_subunit_file = "./data/reaction_gene_subunit_20201201.csv"
reaction_gene_subunit_MW_file = "./analysis/reaction_gene_subunit_MW.csv"
reaction_MW_file = "./analysis/reaction_MW.csv"
reaction_kcat_MW_file = "./analysis/reaction_kcat_MW.csv"

# Step1: Preprocessing of model

The reversible reactions in the GEM model are divided into two irreversible reactions and splited isoenzyme. The input is iML1515 with 2712 reactions. The output is a model with 3375 irreversible reactions.

In [12]:
model = cobra.io.read_sbml_model(model_name)
convert_to_irreversible(model)
#split isoenzyme
model = isoenzyme_split(model)
model

0,1
Name,iML1515
Memory address,0x01f954b7deb8
Number of metabolites,1877
Number of reactions,5883
Number of groups,38
Objective expression,1.0*BIOMASS_Ec_iML1515_core_75p37M - 1.0*BIOMASS_Ec_iML1515_core_75p37M_reverse_35685
Compartments,"cytosol, extracellular space, periplasm"


# Step2: Retrieving enzyme kinetics and proteomics data

The inputs are GEM model. The outputs are 'genes' and 'gpr_relationship' data in the iML1515.

In [13]:
[genes,gpr_relationship] = get_genes_and_gpr(model,gene_outfile,gpr_outfile)

Get the molecular weight of the enzyme (MW) according to the file of all_reaction_GPR.csv, which obtained from the previous step (gpr_relationship, ./analysis/all_reaction_GPR.csv). We need to manually correct the error of the gene_reaction_rule of a small amount of reactions in iML1515 (See Supplementary Table S1 for details), and also need to manually get the subunit of each protein from EcoCyc.

In [14]:
reaction_gene_subunit_MW = get_reaction_gene_subunit_MW(reaction_gene_subunit_file,protein_molecular_weight_file,reaction_gene_subunit_MW_file)
reaction_gene_subunit_MW.head(5)

Unnamed: 0,name,gene_reaction_rule,subunit_num,subunit_mw
ALATA_D2_num1,D-alanine transaminase,b2551,2,45.3161
ALATA_D2_num2,D-alanine transaminase,b0870,4,36.4943
SHCHD2,Sirohydrochlorin dehydrogenase (NAD),b3368,2,49.9508
CPPPGO,Coproporphyrinogen oxidase (O2 required),b2436,2,34.3222
GTHOr,Glutathione oxidoreductase,b3500,2,48.7719


In [15]:
# reaction-gene-subunit-MW file. 
# eg. ALATA_D2,D-alanine transaminase,b2551 or b0870,45.31659 or 36.49471 ,2 or 4 
reaction_MW = calculate_reaction_mw_not_consider_subunit(reaction_gene_subunit_MW_file,reaction_MW_file)
reaction_MW.head(5)

Unnamed: 0,MW
ALATA_D2_num1,45.3161
ALATA_D2_num2,36.4943
SHCHD2,49.9508
CPPPGO,34.3222
GTHOr,48.7719


Calculate kcat/MW. The inputs are 'reaction_kcat' and 'reaction_MW' data for calculating the kcat/MW (When the reaction is catalyzed by several isozymes, the maximum is retained).

In [16]:
reaction_kcat_mw = calculate_reaction_kcat_mw(reaction_kcat_file, reaction_MW_file, reaction_kcat_MW_file)
reaction_kcat_mw.head(5)

Unnamed: 0,MW,kcat,kcat_MW
ALATA_D2_num1,45.3161,4681.349,103.304331
ALATA_D2_num2,36.4943,4681.349,128.27618
CPPPGO,34.3222,2750.765,80.145348
GTHOr,48.7719,1168425.0,23956.924528
DHORD5,36.7741,159865.4,4347.22755


Calculate f. The input is 'genes' data, 'gene_abundance.csv' and 'gene_molecular_weight.csv'.

In [17]:
f = calculate_f(genes, gene_abundance_file, gene_molecular_weight_file)
f

0.4059986079578236

# Step3: Save enzyme concentration constraint model as json file.

In [18]:
#*2
#PDH,1459824,608.26,2400
#SUCOAS_reverse,1447200,142.3403,10167.1839
reaction_kcat_mw_file="./analysis/reaction_kcat_MW.csv"
json_output_file="./model/iML1515_irr_enz_constraint_autopacmen.json"
#The enzyme mass fraction 
f = 0.406
# The total protein fraction in cell.
ptot = 0.56 
# The approximated average saturation of enzyme.
#sigma = 0.5 
sigma = 1 #kapp data sigma is 1
# Lowerbound  of enzyme concentration constraint. 
lowerbound = 0   
upperbound = round(ptot * f * sigma, 3)

trans_model2enz_json_model_split_isoenzyme(model_name, reaction_kcat_mw_file, f, ptot, sigma, lowerbound, upperbound, json_output_file)

In [19]:
json_model_path = "./model/iML1515_irr_enz_constraint_autopacmen.json"
fluxes_outfile = './analysis/ECMpy_ori_solution_df_pfba.csv'
reaction_kcat_mw_file="./analysis/reaction_kcat_MW.csv"
enz_model=get_enzyme_constraint_model(json_model_path)
enz_model_pfba_solution = get_fluxes_detail_in_model(enz_model,fluxes_outfile,reaction_kcat_mw_file)
print(enz_model_pfba_solution.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

norm_model = cobra.io.json.load_json_model(json_model_path)
norm_model_pfba_solution = cobra.flux_analysis.pfba(norm_model)
norm_model_pfba_solution_df = norm_model_pfba_solution.to_frame()
norm_model_pfba_solution_df.to_csv('./analysis/Orimodel_solution_df_pfba.csv')
print(norm_model_pfba_solution_df.fluxes['BIOMASS_Ec_iML1515_core_75p37M'])

0.8697726420320155
0.869772642032012
