# 3.3 iDK1463 comparison

In [1]:
#import package needed
import cobra
import pandas as pd
import numpy as np

from cobra.io import load_json_model
from cobra import Model, Reaction, Metabolite
from cobra.manipulation.modify import rename_genes

In [2]:
# Load EcN model
EcN_ID = 'CP022686.1'
# EcN_model = cobra.io.load_json_model('../data/models/%s_cur_4.7.json'%EcN_ID)
EcN_model = cobra.io.load_json_model('../data/models/%s_draft_3.1.json'%EcN_ID)

# Load EcN model from Kim et al. (2021)
EcN2_model = cobra.io.load_json_model('../data/models_ecn/iDK1463.json')

In [3]:
EcN_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
ca2_e,EX_ca2_e,0.1307,0,0.00%
cl_e,EX_cl_e,0.1307,0,0.00%
cobalt2_e,EX_cobalt2_e,0.0006276,0,0.00%
cu2_e,EX_cu2_e,0.0178,0,0.00%
fe2_e,EX_fe2_e,0.4032,0,0.00%
glc__D_e,EX_glc__D_e,10.0,6,100.00%
h2o_e,EX_h2o_e,443.4,0,0.00%
k_e,EX_k_e,4.9,0,0.00%
mg2_e,EX_mg2_e,0.2178,0,0.00%
mn2_e,EX_mn2_e,0.01735,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
4crsol_c,DM_4crsol_c,-0.005598,7,0.00%
5drib_c,DM_5drib_c,-0.005648,5,0.00%
amob_c,DM_amob_c,-5.02e-05,15,0.00%
mththf_c,DM_mththf_c,-0.01125,5,0.00%
ac_e,EX_ac_e,-567.9,2,55.96%
co2_e,EX_co2_e,-893.8,1,44.03%
meoh_e,EX_meoh_e,-5.02e-05,1,0.00%
pi_e,EX_pi_e,-975.8,0,0.00%


In [4]:
EcN2_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
ca2_e,EX_ca2_e,0.004582,0,0.00%
cl_e,EX_cl_e,0.004582,0,0.00%
cobalt2_e,EX_cobalt2_e,2.201e-05,0,0.00%
cu2_e,EX_cu2_e,0.0006242,0,0.00%
fe2_e,EX_fe2_e,0.007265,0,0.00%
fe3_e,EX_fe3_e,0.006874,0,0.00%
glc__D_e,EX_glc__D_e,10.0,6,100.00%
k_e,EX_k_e,0.1718,0,0.00%
mg2_e,EX_mg2_e,0.007637,0,0.00%
mn2_e,EX_mn2_e,0.0006083,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
4crsol_c,DM_4crsol_c,-0.0001963,7,0.01%
5drib_c,DM_5drib_c,-0.0001981,5,0.00%
amob_c,DM_amob_c,-1.761e-06,15,0.00%
mththf_c,DM_mththf_c,-0.0003944,5,0.01%
ac_e,EX_ac_e,-1.743,2,14.60%
co2_e,EX_co2_e,-20.38,1,85.38%
h2o_e,EX_h2o_e,-43.62,0,0.00%
h_e,EX_h_e,-9.838,0,0.00%
meoh_e,EX_meoh_e,-1.761e-06,1,0.00%


# 1. Compare genes
- Get the old and new locus tag of the CP007799.1 genome
- Compare this with the locus tags from CP022686.1
- Create a dictionary

In [5]:
#import packages needed
from glob import glob
from Bio import Entrez, SeqIO

### 1.1 Create common gene ID table

In [6]:
# Get both locus tags from the CP007799.1 genbank file

in_file = '../data/models_ecn/CP007799.1.gb'

gene_annot_df = pd.DataFrame(columns=('locus', 'old_locus', 'ncbigene', 'ncbiprotein'))
gene_annot_df.set_index('locus', inplace=True)

handle = open(in_file)

records = SeqIO.parse(handle, "genbank")
for record in records:
    for f in record.features:
        if f.type=='CDS':

            # Get locus tag
            if 'locus_tag' in f.qualifiers.keys():
                locus = f.qualifiers['locus_tag'][0]
            
            # get old_locus id
            if 'old_locus_tag' in f.qualifiers.keys():
                old_locus_id = f.qualifiers['old_locus_tag'][0]
                gene_annot_df.loc[locus, 'old_locus'] = old_locus_id
                
            # get gene id
            if 'gene' in f.qualifiers.keys():
                gene_id = f.qualifiers['gene'][0]
                gene_annot_df.loc[locus, 'ncbigene'] = gene_id
            
            # get protein_id
            if 'protein_id' in f.qualifiers.keys():
                prot_id = f.qualifiers['protein_id'][0]
                gene_annot_df.loc[locus, 'ncbiprotein'] = prot_id

gene_annot_df.head()

Unnamed: 0_level_0,old_locus,ncbigene,ncbiprotein
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ECOLIN_RS00005,,thrL,WP_001386572.1
ECOLIN_RS00010,ECOLIN_00010,thrA,WP_001264710.1
ECOLIN_RS00015,ECOLIN_00015,thrB,WP_000241663.1
ECOLIN_RS00020,ECOLIN_00020,thrC,WP_001704269.1
ECOLIN_RS00030,ECOLIN_00025,,WP_000738734.1


In [7]:
# Create a conversion dataframe
conv_df = pd.read_csv('../tables/new_old_EcN_geneIDs_matrix.csv')
conv_df.rename(columns={'Unnamed: 0': 'old_locus'}, inplace=True)
conv_df.set_index('old_locus', inplace=True)

# Merge both dataframes
convRS_df = pd.merge(gene_annot_df, conv_df, left_on="old_locus", right_index = True)

# Create a dictionary
EcN_dict = convRS_df['CP022686.1'].to_dict()

# Number of genes not present in CP022868.1
print("missing genes of CP0226861. =", len(convRS_df[convRS_df['CP022686.1'] == 'None']))

# Inspect dataframe
convRS_df.head()

missing genes of CP0226861. = 22


Unnamed: 0_level_0,old_locus,ncbigene,ncbiprotein,CP022686.1
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ECOLIN_RS00010,ECOLIN_00010,thrA,WP_001264710.1,CIW80_17685
ECOLIN_RS00015,ECOLIN_00015,thrB,WP_000241663.1,CIW80_17690
ECOLIN_RS00020,ECOLIN_00020,thrC,WP_001704269.1,CIW80_17695
ECOLIN_RS00030,ECOLIN_00025,,WP_000738734.1,CIW80_17700
ECOLIN_RS00035,ECOLIN_00030,yaaA,WP_000906193.1,CIW80_17705


### 1.2 Differences between models

In [8]:
# Get a dataframe of all genes of both models
EcN_gen_df = pd.DataFrame(columns=['EcN_gen'])
EcN2_gen_df = pd.DataFrame(columns=['EcN2_gen'])

loc= 0
for gen in EcN_model.genes:
    EcN_gen_df.loc[loc,'EcN_gen'] = gen.id
    loc+=1
    
loc= 0
for gen in EcN2_model.genes:
    EcN2_gen_df.loc[loc,'EcN2_gen'] = gen.id
    loc+=1

In [9]:
# Convert the EcN2_gen to CP022686.1 IDs
EcN2_gen_df = pd.merge(EcN2_gen_df, convRS_df, left_on="EcN2_gen", right_index = True)

# Create two sets
EcN_gen = set(EcN_gen_df['EcN_gen'].to_list())
EcN2_gen = set(EcN2_gen_df['CP022686.1'].to_list())

# Compare
EcN_g_only =set(EcN_gen) - set(EcN2_gen)
EcN2_g_only = set(EcN2_gen) - set(EcN_gen)

print('Number of genes specific to EcN: ', len(EcN_g_only),'Number of genes specific to EcN2: ', len(EcN2_g_only)) 

Number of genes specific to EcN:  60 Number of genes specific to EcN2:  37


# 2. Compare reactions
- Compare the reactions
- Add missing reactions to the updated EcN_model

In [10]:
# Load EcN model > use the non-curated model, as copies were not removed from their model
EcN_ID = 'CP022686.1'
EcN_model = cobra.io.load_json_model('../data/models/%s_draft_3.1.json'%EcN_ID)

### 2.1 Compare reactions

In [11]:
# Compare the reactions
EcN_rxn = []
EcN2_rxn = []

for rxn in EcN_model.reactions:
    EcN_rxn.append(rxn.id)
for rxn in EcN2_model.reactions:
    EcN2_rxn.append(rxn.id)  

EcN_only =set(EcN_rxn) - set(EcN2_rxn)
EcN2_only = set(EcN2_rxn) - set(EcN_rxn)

print(len(EcN_only), len(EcN2_only))

EcN2_only

102 219


{'23DOGULNt4pp',
 '3SLAR',
 '5DGLCNR',
 '6D6SFK',
 '6D6SPA',
 'AATA',
 'AIRStpp',
 'AKGDH2',
 'ALAASPabcpp',
 'ALAASPtex',
 'ALAASPtpp',
 'ALAGLNabcpp',
 'ALAGLNtex',
 'ALAGLNtpp',
 'ALAGLUtex',
 'ALAGLUtpp',
 'ALAGLYabcpp',
 'ALAGLYtex',
 'ALAGLYtpp',
 'ALAHIStpp',
 'ALALEUtpp',
 'ALATHRtpp',
 'ALAt2pp_copy2',
 'AMPEP1',
 'AMPEP10',
 'AMPEP11',
 'AMPEP13',
 'AMPEP14',
 'AMPEP22',
 'AMPEP23',
 'AMPEP24',
 'AMPEP26',
 'AMPEP27',
 'AMPEP29',
 'AMPEP32',
 'AMPEP5',
 'AMPEP8',
 'AOXSr',
 'ASNabcpp',
 'BDH',
 'BIOMASS_Ec_iDK1463_core_59p80M',
 'CELLB6PH',
 'CYNTt2pp',
 'CYSIabcpp',
 'D5KGPA',
 'DM_lipidA_core_e_p',
 'DXYLUDtpp',
 'ETHAtpp',
 'EX_13ppd_e',
 'EX_23dhbzs3_e',
 'EX_2hb_e',
 'EX_2obut_e',
 'EX_2pg_e',
 'EX_3hoxpac_e',
 'EX_3ntym_e',
 'EX_3pg_e',
 'EX_4abz_e',
 'EX_4hoxpac_e',
 'EX_4hphac_e',
 'EX_4hthr_e',
 'EX_5aptn_e',
 'EX_6apa_e',
 'EX_AEP_e',
 'EX_C02356_e',
 'EX_CE0074_e',
 'EX_L_alagly_e',
 'EX_abt__D_e',
 'EX_abt__L_e',
 'EX_acglu_e',
 'EX_acolipa_e',
 'EX_acon_C_e',
 'E

In [12]:
# Prepare the list of reactions to be added
rxn_to_add = list(EcN2_only)
rxn_to_add.remove('ALAt2pp_copy2') # Original already present
rxn_to_add.remove('MALDDH') #To be added in notebook 4.3
rxn_to_add.remove('BIOMASS_Ec_iDK1463_core_59p80M') # Added at the end of notebook

In [13]:
# Load the table with information on origin of reactions
rxn_origin_df = pd.read_csv('../tables/rxn_origin.csv')
rxn_origin_df.set_index('reaction', inplace=True)
rxn_origin_df.head()

Unnamed: 0_level_0,origin,added,notebook,notes
reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CYTDK2,iML1515,automatic,3.1,none
XPPT,iML1515,automatic,3.1,none
HXPRT,iML1515,automatic,3.1,none
NDPK5,iML1515,automatic,3.1,none
SHK3Dr,iML1515,automatic,3.1,none


### 2.2 Add reactions

In [14]:
# add reaction
for reaction in rxn_to_add:
    EcN_model.add_reactions([EcN2_model.reactions.get_by_id(reaction)])
#     print('added reaction:', reaction)
    
    # Add reaction to overview table
    rxn_origin_df.loc[reaction, 'origin'] = 'iDK1463'
    rxn_origin_df.loc[reaction, 'added'] = 'automatic'
    rxn_origin_df.loc[reaction, 'notebook'] = '3.3'
    rxn_origin_df.loc[reaction, 'notes'] = 'none'

### 2.3 Update Gene ID

In [15]:
# Change gene ID
for gene in EcN_model.genes:
    if 'ECOLIN' in gene.id:
        try: # Find out if gene is in annotation overview
            gene_annot_df.loc[gene.id, 'ncbigene'] 
            if pd.isnull(gene_annot_df.loc[gene.id, 'ncbigene']): # Only change name when there is a name in annotation overview
                    print(gene.id)
                    pass
            else:
                EcN_model.genes.get_by_id(gene.id).name = gene_annot_df.loc[gene.id, 'ncbigene']
                print(gene.id, gene_annot_df.loc[gene.id, 'ncbigene'])
        except:
            print('Gene not in overview:', gene.id)           

ECOLIN_RS01800 pepD
ECOLIN_RS03795 dtpD
ECOLIN_RS23900 dtpC
ECOLIN_RS09120 dtpA
ECOLIN_RS19975 dtpB
ECOLIN_RS22340
ECOLIN_RS01525
ECOLIN_RS23455
ECOLIN_RS13875 ptsH
ECOLIN_RS23205
ECOLIN_RS23210
ECOLIN_RS13880 ptsI
ECOLIN_RS23195
ECOLIN_RS23200
ECOLIN_RS01820 phoE
ECOLIN_RS08090 ompN
ECOLIN_RS04920 ompF
ECOLIN_RS12935 ompC
ECOLIN_RS07520 oppC
ECOLIN_RS20270 dppB
ECOLIN_RS20265 dppC
ECOLIN_RS07525 oppD
ECOLIN_RS20255 dppF
ECOLIN_RS20260 dppD
ECOLIN_RS07510 oppA
ECOLIN_RS07515 oppB
ECOLIN_RS20280 dppA
ECOLIN_RS07530 oppF
ECOLIN_RS02490 tsx
ECOLIN_RS04135 bioF
ECOLIN_RS22350
ECOLIN_RS20830
ECOLIN_RS24935 yjiR
ECOLIN_RS08295 ydcR
ECOLIN_RS17895 yhaK
ECOLIN_RS19680 yhhW
ECOLIN_RS23190 sorE
ECOLIN_RS27005
ECOLIN_RS22345 yihU
ECOLIN_RS26995
ECOLIN_RS25965 pepQ
ECOLIN_RS22185 pepQ
ECOLIN_RS09895 ydjJ
ECOLIN_RS14410 yphC
ECOLIN_RS03815 pxpA
ECOLIN_RS03810 pxpC
ECOLIN_RS03805 pxpB
ECOLIN_RS08745
ECOLIN_RS12075 cpsB
ECOLIN_RS12000 cpsB
ECOLIN_RS01735 yafV
ECOLIN_RS23440 sucC
ECOLIN_RS23445 sucD
E

### 2.5 Rename genes

In [16]:
# Add ECOLIN_RS13880 to CIW80_05965 to the dictionary 
# This gene was not automatically added to the list, but was manually added based on gene name > gene pstI
EcN_dict['ECOLIN_RS13880'] = 'CIW80_05965'

# rename genes
rename_genes(EcN_model, EcN_dict)

# Get Biocyc overview
EcN_biocyc = pd.read_csv('../tables/EcN_genes_Biocyc.txt', sep='\t', header=0, names = ['Gene', 'Description', 'Accession1', 'Accession2'])
EcN_biocyc.set_index('Accession2', inplace = True)
EcN_biocyc

# Find out genes that still have an old gene_id
for gene in EcN_model.genes:
    if 'ECOLIN_RS' in gene.name:
        if 'CIW80_RS' in EcN_biocyc.loc[gene.id, 'Gene']:
            EcN_model.genes.get_by_id(gene.id).name = EcN_biocyc.loc[gene.id, 'Description']
        else:
            EcN_model.genes.get_by_id(gene.id).name = EcN_biocyc.loc[gene.id, 'Gene'] + ', ' + EcN_biocyc.loc[gene.id, 'Description']
        print(gene.id, gene.name)

CIW80_15475 Ldh family oxidoreductase
CIW80_15215 PTS system mannose/fructose/N-acetylgalactosamine-transporter subunit IIB
CIW80_15220 PTS sorbose IIA component
CIW80_15205 PTS system mannose/fructose/sorbose family transporter subunit IID
CIW80_15210 PTS mannose/fructose/sorbose transporter subunit IIC
CIW80_12995 ketose-bisphosphate aldolase
CIW80_17025 S-methyl-5-thioribose kinase
CIW80_17035 L-fuculose-phosphate aldolase
CIW80_15445 sucA, subunit of E1(0) component of 2-oxoglutarate dehydrogenase


In [17]:
# Find out genes that still have an old gene_id
for gene in EcN_model.genes:
    if 'ECOLIN_RS' in gene.id:
        print(gene.id)

### 2.4 Update gene annotations

In [18]:
# Reload EcN model from Kim et al. (2021) > for some reason renaming also affects the EcN2 model
EcN2_model = cobra.io.load_json_model('../data/models_ecn/iDK1463.json')

# Create dataframe
gene_annot_EcN2 = pd.DataFrame(columns=['gene', 'asap', 'ecogene', 'ncbigene', 'ncbigi', 'refseq_locus_tag', 'refseq_name', 'refseq_synonym', 'sbo', 'uniprot'])
gene_annot_EcN2.set_index('gene', inplace= True)

# Add all annotation info of iDK1463 into dataframe
for gene in EcN2_model.genes:
    for annot in EcN2_model.genes.get_by_id(gene.id).annotation:
        gene_annot_EcN2.loc[gene.id, annot] = EcN2_model.genes.get_by_id(gene.id).annotation[annot]
    
gene_annot_EcN2.head()

Unnamed: 0_level_0,asap,ecogene,ncbigene,ncbigi,refseq_locus_tag,refseq_name,refseq_synonym,sbo,uniprot,refseq_old_locus_tag
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ECOLIN_RS12160,ABE-0006840,EG11701,946597,90111379,b2066,udk,"[ECK2060, JW2051]",SBO:0000243,P0A8F4,
ECOLIN_RS01805,ABE-0000814,EG10414,944817,16128224,b0238,gpt,"[gpp, ECK0239, gxu, JW0228]",SBO:0000243,P0A9M5,
ECOLIN_RS00695,ABE-0000440,EG20098,946624,90111088,b0125,hpt,"[ECK0124, JW5009]",SBO:0000243,P0A9M2,
ECOLIN_RS14305,ABE-0008291,EG10650,945611,16130443,b2518,ndk,"[JW2502, ECK2514]",SBO:0000243,P0A763,
ECOLIN_RS02815,ABE-0001645,EG10032,945097,16128458,b0474,adk,"[plsA, JW0463, ECK0468, dnaW]",SBO:0000243,P69441,


In [19]:
# Merge dataframes

gene_annot_EcN2 = pd.merge(gene_annot_EcN2, convRS_df['CP022686.1'],left_index=True, right_index=True, how='left')
gene_annot_EcN2.set_index('CP022686.1', inplace=True)

In [20]:
# Update all missing annotations that are present in the iDK1463 model
for gene in EcN_model.genes:
        for annot in EcN_model.genes.get_by_id(gene.id).annotation:
#             print(EcN_model.genes.get_by_id(gene.id).annotation[annot])
            if EcN_model.genes.get_by_id(gene.id).annotation[annot] == '':
                EcN_model.genes.get_by_id(gene.id).annotation[annot] = gene_annot_EcN2.loc[gene.id, annot]
                print(gene, annot, EcN_model.genes.get_by_id(gene.id).annotation[annot])

No annotations of the iDK1463 model were missing

### 2.5 Update subsystem reactions

In [21]:
for rxn in rxn_to_add:
    
# Based on ID
    if 'EX' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Extracellular exchange'
    
    if 'abcpp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'

    if 'tpp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'
        
    if 't2pp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'         
        
    if 't2rpp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'        
        
    if 't3pp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'      
        
    if 't4pp' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'       

    if 'tex' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Outer Membrane'
        
    if 't2ex' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Outer Membrane'       
         
# Based on name
    if 'PEP:Pyr' in EcN_model.reactions.get_by_id(rxn).name: # Correct the subsystem of these reactions to Inner Membrane
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Transport, Inner Membrane'
        
    if 'AMPEP' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions to Inner Membrane
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Amino Acid Metabolism'

    if 'ACOAD' in EcN_model.reactions.get_by_id(rxn).id: # Correct the subsystem of these reactions to Inner Membrane
        EcN_model.reactions.get_by_id(rxn).subsystem = 'Membrane Lipid Metabolism'        
        

In [22]:
# Identify reactions still missing subsystem
df_reactions = pd.DataFrame(columns=['ID', 'Name', 'Subsystem'])
df_reactions.set_index('ID', inplace=True)

for rxn in EcN_model.reactions:
    bigg_id = rxn.id
    df_reactions.loc[bigg_id, 'Name'] = rxn.name
    df_reactions.loc[bigg_id, 'Reaction Formula'] = rxn.reaction
    df_reactions.loc[bigg_id, 'Subsystem'] = rxn.subsystem

sub_missing_df = df_reactions[df_reactions['Subsystem'] == '']
sub_missing_df.head()

Unnamed: 0_level_0,Name,Subsystem,Reaction Formula
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CELLBpts_1,Cellobiose transport via PEP:Pyr PTS,,cellb_e + pep_c --> 6pgg_c + pyr_p
DMALRED,(S)-Malate:(acceptor) oxidoreductase,,fad_c + mal__L_c --> fadh2_c + oaa_c
DHACOAH,"2,3-dehydroadipyl-CoA hydratase",,23dhacoa_c + h2o_c --> 3hadpcoa_c
IMACTD,Imidazole acetaldeyde dehydrogenase,,h2o_c + im4act_c + nad_c --> 2.0 h_c + im4ac_c...
ALDD2x,"Aldehyde dehydrogenase (acetaldehyde, NAD)",,acald_c + h2o_c + nad_c --> ac_c + 2.0 h_c + n...


In [23]:
# Add all subsystems present in panecoli model
panecoli = cobra.io.load_json_model('../data/models/panecoli_universal.json')

for rxn in sub_missing_df.index:
    try:
        EcN_model.reactions.get_by_id(rxn).subsystem = panecoli.reactions.get_by_id(rxn).subsystem
    except:
        print(rxn, 'not in panecoli')

L_LACD4 not in panecoli
AOXSr not in panecoli
D5KGPA not in panecoli
AATA not in panecoli
Q23DO not in panecoli
SRB1PR not in panecoli
MTRK not in panecoli
MDRPD not in panecoli
SBTD_D2 not in panecoli
OPAH not in panecoli
CELLB6PH not in panecoli
MAN1PT not in panecoli
r0085 not in panecoli
XYLTD_D not in panecoli
SUCOAS2 not in panecoli
BDH not in panecoli
PYDXOR not in panecoli
MTRI not in panecoli
AKGDH2 not in panecoli


In [24]:
# Manually update names based on BIGG database
EcN_model.reactions.get_by_id('D5KGPA').subsystem = 'Inositol Metabolism'
EcN_model.reactions.get_by_id('MDRPD').subsystem = 'Arginine and Proline Metabolism'
EcN_model.reactions.get_by_id('OPAH').subsystem = 'Glutathione Metabolism'
EcN_model.reactions.get_by_id('MTRK').subsystem = 'Methionine Salvage'
EcN_model.reactions.get_by_id('Q23DO').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('AATA').subsystem = 'Threonine and Lysine Metabolism'
EcN_model.reactions.get_by_id('MAN1PT').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('L_LACD4').subsystem = 'Pyruvate Metabolism'
EcN_model.reactions.get_by_id('MTRI').subsystem = 'Methionine Salvage'
EcN_model.reactions.get_by_id('SRB1PR').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('BDH').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('SUCOAS2').subsystem = 'Citric Acid Cycle'
EcN_model.reactions.get_by_id('AOXSr').subsystem = 'Cofactor and Prosthetic Group Biosynthesis'
EcN_model.reactions.get_by_id('AKGDH2').subsystem = 'Citric Acid Cycle'
EcN_model.reactions.get_by_id('PYDXOR').subsystem = 'Cofactor and Prosthetic Group Biosynthesis'
EcN_model.reactions.get_by_id('CELLB6PH').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('SBTD_D2').subsystem = 'Alternate Carbon Metabolism'
EcN_model.reactions.get_by_id('XYLTD_D').subsystem = 'Pentose Phosphate Pathway'

### 2.6 Update biomass

In [25]:
# Update Biomass
EcN2_model.reactions.BIOMASS_Ec_iDK1463_core_59p80M

# Add and change reaction
EcN_model.add_reactions([EcN2_model.reactions.BIOMASS_Ec_iDK1463_core_59p80M])
EcN_model.reactions.BIOMASS_Ec_iDK1463_core_59p80M.subsystem = 'Biomass and maintenance functions'
EcN_model.reactions.BIOMASS_Ec_iDK1463_core_59p80M.name = 'E. coli biomass objective function (iHM1533) - core - with 59.80 GAM estimate'
EcN_model.reactions.BIOMASS_Ec_iDK1463_core_59p80M.id = 'BIOMASS_EcN_iHM1533_core_59p80M'

# Make model objective
EcN_model.objective = EcN_model.reactions.BIOMASS_EcN_iHM1533_core_59p80M

### 2.7 Model adjustments of iDK1463

In [26]:
# Delete several reactions
EcN_model.remove_reactions(['ACACtex', 'ABUTtex', 'BUTt2rpp',  'ETHAtex', 'URIC', 'XANtex'])

# 3. Save model

In [27]:
EcN_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
ca2_e,EX_ca2_e,0.1498,0,0.00%
cl_e,EX_cl_e,0.1498,0,0.00%
cobalt2_e,EX_cobalt2_e,0.0007194,0,0.00%
cu2_e,EX_cu2_e,0.0204,0,0.00%
fe2_e,EX_fe2_e,0.4622,0,0.00%
glc__D_e,EX_glc__D_e,10.0,6,100.00%
h2o_e,EX_h2o_e,364.8,0,0.00%
k_e,EX_k_e,5.617,0,0.00%
mg2_e,EX_mg2_e,0.2496,0,0.00%
mn2_e,EX_mn2_e,0.01988,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
4crsol_c,DM_4crsol_c,-0.006417,7,0.00%
5drib_c,DM_5drib_c,-0.006475,5,0.00%
amob_c,DM_amob_c,-5.755e-05,15,0.00%
mththf_c,DM_mththf_c,-0.01289,5,0.00%
ac_e,EX_ac_e,-501.8,2,53.41%
co2_e,EX_co2_e,-875.3,1,46.58%
meoh_e,EX_meoh_e,-5.755e-05,1,0.00%
pi_e,EX_pi_e,-972.3,0,0.00%


In [28]:
# Save as a table
rxn_origin_df.to_csv('../tables/rxn_origin.csv')

In [29]:
# Save the model
cobra.io.json.save_json_model(EcN_model, str('../data/models/%s_draft_3.3.json'%EcN_ID), pretty=False)