# 4.7 Update of annotations
Annotations are missing in the model. Annotations are added for:
- Reactions
- Metabolites
- Genes

Additionally metabolites and genes which are not associated to reactions are removed.

In [1]:
#import package needed
import cobra
import pandas as pd
import numpy as np

from cobra.io import load_json_model
from cobra import Model, Reaction, Metabolite

In [2]:
# Load EcN model
EcN_ID = 'CP022686.1'
EcN_model = cobra.io.load_json_model('../data/models/%s_cur_4.6.json'%EcN_ID)

# 1. Update annotations

In [3]:
# Load metabolite overview
mtb_info = pd.read_csv('../tables/metabolites_info.csv')
mtb_info['ID'] = mtb_info['ID'].str.strip() #Remove white spaces
mtb_info.set_index('ID', inplace=True)
mtb_info.rename(columns={'Reactome Compound': 'reactome','KEGG Compound':'kegg.compound', 'CHEBI': 'chebi',
                         'InChI Key': 'inchikey', 'Human Metabolome Database':'Human Metabolome Database', 'BioCyc': 'biocyc',
                         'MetaNetX (MNX) Chemical': 'metanetx.chemical', 'SEED Compound': 'seed.compound',
                         'LipidMaps': 'lipidmaps', 'KEGG Drug': 'kegg.drug', 'KEGG Glycan':'kegg.glycan'}, inplace = True)

# Load reaction overview
rxn_info = pd.read_csv('../tables/reactions_info.csv')
rxn_info['ID'] = rxn_info['ID'].str.strip() #Remove white spaces
rxn_info.set_index('ID', inplace=True)
rxn_info.rename(columns={'RHEA': 'rhea', 'BioCyc':'biocyc', 'MetaNetX (MNX) Equation': 'metanetx.reaction',
                         'KEGG Reaction': 'kegg.reaction','SEED Reaction': 'seed.reaction',
                         'EC Number': 'ec-code', 'Reactome Reaction': 'reactome'}, inplace = True)
rxn_info.head()

Unnamed: 0_level_0,Name,Reaction Formula,Gene Rules,Subsystem,Essentiality,LB,UB,FVA_min,FVA_max,rhea,biocyc,metanetx.reaction,kegg.reaction,seed.reaction,ec-code,reactome
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ALATA_D2,D-alanine transaminase,ala__D_c + pydx5p_c --> pyam5p_c + pyr_c,CIW80_06560 or CIW80_22360,Cofactor and Prosthetic Group Biosynthesis,Not essential,0.0,1000.0,0.0,-1.698893e-12,28565,META:RXN0-5240,MNXR95697,R01147,rxn00848,,
SHCHD2,Sirohydrochlorin dehydrogenase (NAD),dscl_c + nad_c --> h_c + nadh_c + scl_c,CIW80_11380,Cofactor and Prosthetic Group Biosynthesis,Essential,0.0,1000.0,0.00025,0.0002500454,15616,META:DIMETHUROPORDEHYDROG-RXN,MNXR104373,R03947,rxn02774,1.3.1.76,
CPPPGO,Coproporphyrinogen oxidase (O2 required),cpppg3_c + 2.0 h_c + o2_c --> 2.0 co2_c + 2.0 ...,CIW80_06055,Cofactor and Prosthetic Group Biosynthesis,Not essential,0.0,1000.0,0.00025,0.0002500454,18260,META:RXN0-1461,MNXR96880,R03220,rxn02303,1.3.3.3,
GTHOr,Glutathione oxidoreductase,gthox_c + h_c + nadph_c <=> 2.0 gthrd_c + nadp_c,CIW80_12105,Cofactor and Prosthetic Group Biosynthesis,Not essential,-1000.0,1000.0,0.0,0.2774966,11743,META:GLUTATHIONE-REDUCT-NADPH-RXN,MNXR100098,R00115,rxn00086,1.8.1.7,R-XTR-71682
DHORD5,Dihydroorotic acid (menaquinone-8),dhor__S_c + mqn8_c --> mql8_c + orot_c,CIW80_22710,Purine and Pyrimidine Biosynthesis,Not essential,0.0,1000.0,0.0,0.3709138,29202,,MNXR97421,,rxn08336,,


In [4]:
# Remove the secondary biosynthesis reactions > replaced by detailed reactions in 4.6
for rxn in rxn_info.index.values:
    if 'MNXR' in rxn:
        rxn_info.drop([rxn], inplace=True)
        
rxn_info.rename(index={'2HH24DDH1_copy1': '2HH24DDH1'}, inplace=True)
        
rxn_info.drop(['ENTCS', 'SALASYN', 'YBTSYN', 'PRECLBTNSYN', 'ACGAL6PISO', 'ACGAL6PI', '2HH24DDH1_copy2'], inplace=True) # These reactions were replaced in 4.6 and removed in 4.1

In [5]:
# Check
rxn_info.loc['GLTPD', 'ec-code']
# rxn_info.loc['NO3R2pp', 'rhea']
# rxn_info.loc['SHCHD2', 'ec-code']

'1.1.1.M6'

### 1.1 Update reactions

In [6]:
# add annotations reactions
for rxn in rxn_info.index.values:
    reaction = EcN_model.reactions.get_by_id(rxn)

    # Add database annotations when present
    for annot in ['rhea', 'biocyc', 'metanetx.reaction','kegg.reaction',
                  'seed.reaction', 'ec-code', 'reactome']:
        if pd.isnull(rxn_info.loc[rxn, annot]):
            pass
        else:
            if annot == 'rhea':
                rxn_info.loc[rxn, annot] = rxn_info.loc[rxn, annot].split('#')[0] # Remove the #1 which is present in some rhea annotations
            reaction.annotation[annot] = [rxn_info.loc[rxn, annot]]

In [7]:
# Check a reaction
EcN_model.reactions.ALATA_D2.annotation

{'rhea': ['28565'],
 'biocyc': ['META:RXN0-5240'],
 'metanetx.reaction': ['MNXR95697'],
 'kegg.reaction': ['R01147'],
 'seed.reaction': ['rxn00848']}

In [8]:
# Update subsystem annotation of reactions
for rxn in EcN_model.reactions:
    if 't2pp' in rxn.id: # Correct the subsystem of these reactions to Inner Membrane
        if rxn.subsystem == 'Transport':
            rxn.subsystem = 'Transport, Inner Membrane'
    
    if 'PEP:Pyr' in rxn.name: # Correct the subsystem of these reactions to Inner Membrane
        rxn.subsystem = 'Transport, Inner Membrane'
        
    if 'tpp' in rxn.id: # Correct the subsystem of these reactions to Inner Membrane
        if rxn.subsystem == 'Transport' or rxn.subsystem == 'Exchange':
            rxn.subsystem = 'Transport, Inner Membrane'

    if 'tex' in rxn.id: # Correct the subsystem of these reactions to Outer Membrane
        if rxn.subsystem == 'Transport':
            rxn.subsystem = 'Transport, Outer Membrane'
        
    if 't4rpp' in rxn.id: # Correct the subsystem of these reactions to Outer Membrane
        rxn.subsystem = 'Transport, Outer Membrane'
        
        
### Align all names
    if rxn.subsystem == 'Transport Outer Membrane Porin':
        rxn.subsystem = 'Transport, Outer Membrane'
        
    if rxn.subsystem == 'Transport, Outer Membrane Porin':
        rxn.subsystem = 'Transport, Outer Membrane'
        
    if rxn.subsystem == 'S_Transport_Outer_Membrane_Porin':
        rxn.subsystem = 'Transport, Outer Membrane'
        
    if rxn.subsystem == 'Transport Outer Membrane':
        rxn.subsystem = 'Transport, Outer Membrane'
        
    if rxn.subsystem == 'Transport Inner Membrane':
        rxn.subsystem = 'Transport, Inner Membrane'
        
    if rxn.subsystem == 'S_Transport_Inner_Membrane':
        rxn.subsystem = 'Transport, Inner Membrane'
        
    if rxn.subsystem == 'S_Aromatic_Acid_Breakdown':
        rxn.subsystem = 'Aromatic Acid Breakdown'
        
    if rxn.subsystem == 'S_Alternate_Carbon_Metabolism':
        rxn.subsystem = 'Alternate Carbon Metabolism'
        
    if rxn.subsystem == 'S_Alternate_Carbon_source':
        rxn.subsystem = 'Alternate Carbon Metabolism'
        
    if rxn.subsystem == 'S_Lipopolysaccharide_Biosynthesis___Recycling':
        rxn.subsystem = 'Lipopolysaccharide Biosynthesis / Recycling'
        
    if rxn.subsystem == 'S_Lipopolysaccharide_Biosynthesis_Recycling':
        rxn.subsystem = 'Lipopolysaccharide Biosynthesis / Recycling'    
        
    if rxn.subsystem == 'iron metabolism':
        rxn.subsystem = 'Iron Metabolism'
        
    if rxn.subsystem == 'S_penicillin_breakdown':
        rxn.subsystem = 'Penicillin Breakdown'
        
    if rxn.subsystem == 'purine metabolism':
        rxn.subsystem = 'Purine Metabolism'
        
    if rxn.subsystem == 'siderophore biosynthesis':
        rxn.subsystem = 'Secondary metabolite biosynthesis'
        
    if rxn.subsystem == 'Benzoate degradation':
        rxn.subsystem = 'Benzoate Degradation'   

### 1.2 Update metabolites

In [9]:
# Remove metabolites
mtb_info.drop(['asn__L', 'preclbtn_c', 'preclbtn_p'], inplace=True)

In [10]:
# add annotations metabolites
for mtb in mtb_info.index.values:
    metabolite = EcN_model.metabolites.get_by_id(mtb)

    # Add database annotations when present
    for annot in ['reactome', 'kegg.compound', 'chebi', 'inchikey', 'Human Metabolome Database', 'biocyc',
                  'metanetx.chemical', 'seed.compound', 'lipidmaps', 'kegg.drug', 'kegg.glycan']:
        if pd.isnull(mtb_info.loc[mtb, annot]):
            pass
        else:
            metabolite.annotation[annot] = [mtb_info.loc[mtb, annot]]

In [11]:
# Check a metabolite
dict_1 = EcN_model.metabolites.dms_e.annotation
EcN_model.metabolites.dms_e.annotation

{'kegg.compound': ['C00580'],
 'chebi': ['CHEBI:4611'],
 'inchikey': ['QMMFVYPAHWMCMS-UHFFFAOYSA-N'],
 'Human Metabolome Database': ['HMDB02303'],
 'biocyc': ['META:CPD-7670'],
 'metanetx.chemical': ['MNXM444'],
 'seed.compound': ['cpd00450']}

In [12]:
for value in dict_1.values():
    print(value, type(value))

['C00580'] <class 'list'>
['CHEBI:4611'] <class 'list'>
['QMMFVYPAHWMCMS-UHFFFAOYSA-N'] <class 'list'>
['HMDB02303'] <class 'list'>
['META:CPD-7670'] <class 'list'>
['MNXM444'] <class 'list'>
['cpd00450'] <class 'list'>


In [13]:
dict_1['seed.compound']

['cpd00450']

### 1.3 Update genes

In [14]:
#import packages needed
from glob import glob
from Bio import Entrez, SeqIO

In [15]:
in_file = '../data/genomes_gb/CP022686.1.gb'

gene_annot_df = pd.DataFrame(columns=('locus', 'ncbigene', 'ncbiprotein'))
gene_annot_df.set_index('locus', inplace=True)

handle = open(in_file)

records = SeqIO.parse(handle, "genbank")
for record in records:
    for f in record.features:
        if f.type=='CDS':

            # Get locus tag
            if 'locus_tag' in f.qualifiers.keys():
                locus = f.qualifiers['locus_tag'][0]
            
#             # get gene id
            if 'gene' in f.qualifiers.keys():
                gene_id = f.qualifiers['gene'][0]
                gene_annot_df.loc[locus, 'ncbigene'] = gene_id
            
            # get protein_id
            if 'protein_id' in f.qualifiers.keys():
                prot_id = f.qualifiers['protein_id'][0]
                gene_annot_df.loc[locus, 'ncbiprotein'] = prot_id

gene_annot_df.head()

Unnamed: 0_level_0,ncbigene,ncbiprotein
locus,Unnamed: 1_level_1,Unnamed: 2_level_1
CIW80_00005,,AXY44376.1
CIW80_00010,,AXY44377.1
CIW80_00015,,AXY44378.1
CIW80_00020,gap,AXY44379.1
CIW80_00025,,AXY48986.1


In [16]:
# add annotations genes
for gene in gene_annot_df.index.values:
    try:
        EcN_gene = EcN_model.genes.get_by_id(gene)

        # Add database annotations when present
        for annot in ['ncbigene', 'ncbiprotein']:
            if pd.isnull(gene_annot_df.loc[gene, annot]):
                pass
            else:
                EcN_gene.annotation[annot] = [gene_annot_df.loc[gene, annot]]
    except:
        pass

In [17]:
EcN_model.genes.CIW80_00140.annotation

{'ncbiprotein': ['AXY44398.1']}

In [18]:
all_g = []
for gene in EcN_model.genes:
    all_g.append(gene.id)
incorrect_g = ["CIW80_06560","CIW80_22360","CIW80_06055","CIW80_12105","CIW80_22710","CIW80_09250","CIW80_09255","CIW80_09260","CIW80_25240","CIW80_11665","CIW80_05155","CIW80_05150","CIW80_15440","CIW80_13715","CIW80_10920","CIW80_01330","CIW80_00900","CIW80_17795","CIW80_22225","CIW80_20355","CIW80_13940","CIW80_01175","CIW80_20220","CIW80_13885","CIW80_15050","CIW80_18385","CIW80_06355","CIW80_05875","CIW80_15030","CIW80_04120","CIW80_19465","CIW80_18380","CIW80_20510","CIW80_06420","CIW80_23920","CIW80_25370","CIW80_24300","CIW80_25140","CIW80_06310","CIW80_16495","CIW80_07420","CIW80_00380","CIW80_05780","CIW80_05385","CIW80_12540","CIW80_13625","CIW80_22655","CIW80_16520","CIW80_13345","CIW80_01245","CIW80_07210","CIW80_10455","CIW80_18090","CIW80_18095","CIW80_13230","CIW80_25255","CIW80_25260","CIW80_05575","CIW80_25250","CIW80_07580","CIW80_08350","CIW80_04270","CIW80_01385","CIW80_15260","CIW80_02170","CIW80_21825","CIW80_11440","CIW80_21365","CIW80_00375","CIW80_16675","CIW80_20625","CIW80_12645","CIW80_09415","CIW80_09450","CIW80_15045","CIW80_17605","CIW80_21215","CIW80_14115","CIW80_09435","CIW80_12785","CIW80_24290","CIW80_12910","CIW80_20190","CIW80_13720","CIW80_05400","CIW80_14405","CIW80_06720","CIW80_14790","CIW80_14785","CIW80_16090","CIW80_16095","CIW80_16100","CIW80_16085","CIW80_22365","CIW80_05420","CIW80_10065","CIW80_02165","CIW80_05415","CIW80_12865","CIW80_18635","CIW80_18675","CIW80_22590","CIW80_07590","CIW80_18205","CIW80_18565","CIW80_24275","CIW80_05915","CIW80_05125","CIW80_06740","CIW80_05120","CIW80_07940","CIW80_25130","CIW80_01335","CIW80_22980","CIW80_13435","CIW80_24155","CIW80_17915","CIW80_10630","CIW80_00895","CIW80_15910","CIW80_22550","CIW80_24945","CIW80_11725","CIW80_11720","CIW80_11625","CIW80_20255","CIW80_18355","CIW80_15885","CIW80_10330","CIW80_14820","CIW80_18360","CIW80_22605","CIW80_09960","CIW80_09965","CIW80_10350","CIW80_14890","CIW80_13790","CIW80_05545","CIW80_05110","CIW80_20975","CIW80_18475","CIW80_12870","CIW80_20090","CIW80_18630","CIW80_14195","CIW80_20525","CIW80_06960","CIW80_00575","CIW80_07665","CIW80_10075","CIW80_01965","CIW80_10050","CIW80_12795","CIW80_12655","CIW80_13735","CIW80_05510","CIW80_18690","CIW80_06905","CIW80_05275","CIW80_05255","CIW80_20990","CIW80_20985","CIW80_17520","CIW80_21350","CIW80_05390","CIW80_01105","CIW80_21820","CIW80_14010","CIW80_21770","CIW80_20215","CIW80_24935","CIW80_07430","CIW80_12300","CIW80_04030","CIW80_15155","CIW80_08235","CIW80_14810","CIW80_22115","CIW80_04050","CIW80_01030","CIW80_07460","CIW80_07455","CIW80_07675","CIW80_14515","CIW80_07195","CIW80_04235","CIW80_10175","CIW80_04260","CIW80_10140","CIW80_04255","CIW80_17255","CIW80_00545","CIW80_11435","CIW80_09785","CIW80_09790","CIW80_10105","CIW80_24085","CIW80_23880","CIW80_05775","CIW80_18260","CIW80_24080","CIW80_07985","CIW80_06210","CIW80_00765","CIW80_22500","CIW80_00755","CIW80_00750","CIW80_22510","CIW80_12485","CIW80_21210","CIW80_11505","CIW80_20480","CIW80_10405","CIW80_10555","CIW80_01535","CIW80_13835","CIW80_13330","CIW80_08230","CIW80_13470","CIW80_11620","CIW80_13780","CIW80_13825","CIW80_09720","CIW80_02685","CIW80_22210","CIW80_25355","CIW80_09760","CIW80_12945","CIW80_05060","CIW80_05065","CIW80_01340","CIW80_13800","CIW80_20685","s0001","CIW80_17820","CIW80_18780","CIW80_17875","CIW80_17865","CIW80_14200","CIW80_07595","CIW80_06030","CIW80_23065","CIW80_20515","CIW80_14925","CIW80_18230","CIW80_04305","CIW80_24970","CIW80_10460","CIW80_01615","CIW80_09340","CIW80_24100","CIW80_22465","CIW80_21465","CIW80_07120","CIW80_04665","CIW80_07105","CIW80_12635","CIW80_16310","CIW80_12595","CIW80_19745","CIW80_20130","CIW80_18815","CIW80_09455","CIW80_14855","CIW80_22700","CIW80_07485","CIW80_11965","CIW80_06350","CIW80_11340","CIW80_24090","CIW80_15335","CIW80_24895","CIW80_05960","CIW80_05965","CIW80_24885","CIW80_18620","CIW80_13325","CIW80_06285","CIW80_18145","CIW80_12805","CIW80_23760","CIW80_14815","CIW80_00950","CIW80_13190","CIW80_21360","CIW80_01740","CIW80_22145","CIW80_13585","CIW80_16580","CIW80_16585","CIW80_17815","CIW80_13925","CIW80_17830","CIW80_17690","CIW80_01620","CIW80_11335","CIW80_17695","CIW80_15145","CIW80_06835","CIW80_21750","CIW80_01400","CIW80_07170","CIW80_01745","CIW80_00735","CIW80_13810","CIW80_15150","CIW80_02690","CIW80_09620","CIW80_11495","CIW80_17995","CIW80_12690","CIW80_12750","CIW80_15900","CIW80_07695","CIW80_07380","CIW80_18135","CIW80_18165","CIW80_18150","CIW80_20655","CIW80_21610","CIW80_18140","CIW80_18670","CIW80_03895","CIW80_03910","CIW80_24930","CIW80_03930","CIW80_12745","CIW80_16445","CIW80_07480","CIW80_15245","CIW80_17685","CIW80_07995","CIW80_16285","CIW80_03525","CIW80_03520","CIW80_20200","CIW80_10095","CIW80_20915","CIW80_20710","CIW80_20730","CIW80_10560","CIW80_21240","CIW80_20735","CIW80_01800","CIW80_05825","CIW80_03945","CIW80_19965","CIW80_06150","CIW80_14830","CIW80_17840","CIW80_19485","CIW80_18075","CIW80_24065","CIW80_14545","CIW80_01135","CIW80_18560","CIW80_24140","CIW80_01680","CIW80_05200","CIW80_22540","CIW80_10685","CIW80_20040","CIW80_06435","CIW80_06990","CIW80_11660","CIW80_04515","CIW80_04115","CIW80_06245","CIW80_19870","CIW80_20580","CIW80_00570","CIW80_05490","CIW80_11365","CIW80_06300","CIW80_11830","CIW80_19845","CIW80_06640","CIW80_19970","CIW80_15095","CIW80_06590","CIW80_06075","CIW80_06080","CIW80_04035","CIW80_13900","CIW80_06500","CIW80_19850","CIW80_16125","CIW80_12660","CIW80_04700","CIW80_17400","CIW80_16190","CIW80_03940","CIW80_07615","CIW80_13815","CIW80_13795","CIW80_00015","CIW80_00425","CIW80_12250","CIW80_12475","CIW80_00945","CIW80_14705","CIW80_06955","CIW80_19480","CIW80_22660","CIW80_25725","CIW80_05020","CIW80_20170","CIW80_22770","CIW80_25560","CIW80_01380","CIW80_01920","CIW80_01080","CIW80_04970","CIW80_04960","CIW80_04965","CIW80_04950","CIW80_22895","CIW80_20025","CIW80_00320","CIW80_25030","CIW80_00330","CIW80_00840","CIW80_22950","CIW80_02580","CIW80_02575","CIW80_22945","CIW80_12315","CIW80_17725","CIW80_16010","CIW80_15915","CIW80_21115","CIW80_05970","CIW80_06020","CIW80_05990","CIW80_06005","CIW80_21805","CIW80_14590","CIW80_06000","CIW80_09795","CIW80_08300","CIW80_19960","CIW80_19950","CIW80_05845","CIW80_08585","CIW80_14225","CIW80_10960","CIW80_25450","CIW80_25190","CIW80_06765","CIW80_05910","CIW80_11890","CIW80_11855","CIW80_11870","CIW80_16560","CIW80_10270","CIW80_18275","CIW80_13415","CIW80_02750","CIW80_12555","CIW80_12560","CIW80_12615","CIW80_12620","CIW80_09950","CIW80_04570","CIW80_18530","CIW80_18525","CIW80_18520","CIW80_01435","CIW80_01425","CIW80_18555","CIW80_05070","CIW80_24115","CIW80_21370","CIW80_01995","CIW80_01990","CIW80_02000","CIW80_10565","CIW80_17560","CIW80_20960","CIW80_20950","CIW80_20970","CIW80_20955","CIW80_08010","CIW80_05470","CIW80_05480","CIW80_22320","CIW80_22325","CIW80_22330","CIW80_22310","CIW80_05460","CIW80_05465","CIW80_15875","CIW80_16290","CIW80_16295","CIW80_16300","CIW80_00220","CIW80_21250","CIW80_21260","CIW80_21255","CIW80_15645","CIW80_07620","CIW80_10110","CIW80_08475","CIW80_06965","CIW80_02800","CIW80_02815","CIW80_02805","CIW80_07190","CIW80_21645","CIW80_21635","CIW80_22880","CIW80_13560","CIW80_13545","CIW80_13555","CIW80_13530","CIW80_13550","CIW80_13540","CIW80_12975","CIW80_13440","CIW80_13185","CIW80_15730","CIW80_15725","CIW80_15720","CIW80_20680","CIW80_02705","CIW80_22190","CIW80_18375","CIW80_15555","CIW80_08140","CIW80_09385","CIW80_22855","CIW80_09375","CIW80_09370","CIW80_22850","CIW80_09380","CIW80_16760","CIW80_25200","CIW80_09425","CIW80_09420","CIW80_24120","CIW80_05505","CIW80_21070","CIW80_17885","CIW80_04440","CIW80_04445","CIW80_04430","CIW80_04435","CIW80_04670","CIW80_04660","CIW80_20920","CIW80_19865","CIW80_16780","CIW80_24210","CIW80_14025","CIW80_15170","CIW80_22455","CIW80_22460","CIW80_06235","CIW80_20675","CIW80_10100","CIW80_06695","CIW80_10355","CIW80_20780","CIW80_21725","CIW80_19675","CIW80_19680","CIW80_19685","CIW80_22535","CIW80_12855","CIW80_22585","CIW80_03055","CIW80_22680","CIW80_22695","CIW80_03935","CIW80_20420","CIW80_20925","CIW80_25595","CIW80_25590","CIW80_20965","CIW80_09625","CIW80_21470","CIW80_03860","CIW80_25505","CIW80_04925","CIW80_04945","CIW80_04935","CIW80_13785","CIW80_13830","CIW80_06565","CIW80_05230","CIW80_07225","CIW80_07220","CIW80_15850","CIW80_15610","CIW80_13275","CIW80_18265","CIW80_00305","CIW80_16465","CIW80_02080","CIW80_22675","CIW80_06450","CIW80_16655","CIW80_19460","CIW80_04450","CIW80_05140","CIW80_16170","CIW80_06050","CIW80_22345","CIW80_12760","CIW80_07035","CIW80_22250","CIW80_11200","CIW80_18370","CIW80_24820","CIW80_04735","CIW80_04730","CIW80_25150","CIW80_25155","CIW80_25170","CIW80_04720","CIW80_04725","CIW80_25615","CIW80_25160","CIW80_20280","CIW80_17480","CIW80_20860","CIW80_20855","CIW80_20865","CIW80_20850","CIW80_14580","CIW80_21735","CIW80_02210","CIW80_02215","CIW80_02205","CIW80_03325","CIW80_05840","CIW80_20385","CIW80_02855","CIW80_12575","CIW80_12955","CIW80_23905","CIW80_01515","CIW80_12625","CIW80_04045","CIW80_10835","CIW80_10830","CIW80_14540","CIW80_11975","CIW80_11980","CIW80_15765","CIW80_21495","CIW80_21505","CIW80_11875","CIW80_09235","CIW80_12725","CIW80_15305","CIW80_15315","CIW80_15295","CIW80_15300","CIW80_13195","CIW80_18770","CIW80_08415","CIW80_12685","CIW80_20380","CIW80_21730","CIW80_25025","CIW80_11375","CIW80_00335","CIW80_13490","CIW80_10790","CIW80_07060","CIW80_07050","CIW80_07055","CIW80_20125","CIW80_23050","CIW80_24230","CIW80_22290","CIW80_24245","CIW80_22280","CIW80_00135","CIW80_22295","CIW80_00145","CIW80_24235","CIW80_24240","CIW80_00130","CIW80_00140","CIW80_08545","CIW80_18515","CIW80_02250","CIW80_08530","CIW80_22970","CIW80_08450","CIW80_15710","CIW80_13605","CIW80_13620","CIW80_13615","CIW80_16510","CIW80_16500","CIW80_16515","CIW80_13610","CIW80_16505","CIW80_22045","CIW80_22050","CIW80_14645","CIW80_04680","CIW80_00590","CIW80_05450","CIW80_09560","CIW80_11740","CIW80_17240","CIW80_13820","CIW80_04710","CIW80_19915","CIW80_22390","CIW80_05475","CIW80_19820","CIW80_04240","CIW80_04245","CIW80_04250","CIW80_09970","CIW80_11615","CIW80_11765","CIW80_00940","CIW80_03905","CIW80_00530","CIW80_11770","CIW80_13890","CIW80_14525","CIW80_07295","CIW80_07300","CIW80_07275","CIW80_15680","CIW80_07280","CIW80_07435","CIW80_10545","CIW80_01690","CIW80_10540","CIW80_18695","CIW80_15965","CIW80_24805","CIW80_08160","CIW80_08130","CIW80_11850","CIW80_11840","CIW80_11845","CIW80_06515","CIW80_18840","CIW80_13750","CIW80_08060","CIW80_08055","CIW80_08050","CIW80_17165","CIW80_08360","CIW80_12900","CIW80_20240","CIW80_01715","CIW80_04550","CIW80_04545","CIW80_04540","CIW80_01050","CIW80_17925","CIW80_00865","CIW80_11460","CIW80_20050","CIW80_05220","CIW80_21865","CIW80_00860","CIW80_07895","CIW80_00405","CIW80_09610","CIW80_20015","CIW80_01565","CIW80_01555","CIW80_01560","CIW80_05075","CIW80_24985","CIW80_22215","CIW80_11280","CIW80_17920","CIW80_21615","CIW80_21620","CIW80_21765","CIW80_18430","CIW80_04645","CIW80_11455","CIW80_01495","CIW80_21755","CIW80_12775","CIW80_14600","CIW80_08305","CIW80_05530","CIW80_24145","CIW80_00685","CIW80_13995","CIW80_22070","CIW80_08495","CIW80_05210","CIW80_05215","CIW80_05225","CIW80_05240","CIW80_05235","CIW80_12815","CIW80_01810","CIW80_09405","CIW80_12810","CIW80_14230","CIW80_12850","CIW80_07730","CIW80_01265","CIW80_14000","CIW80_04455","CIW80_05250","CIW80_02810","CIW80_06715","CIW80_00645","CIW80_17630","CIW80_06295","CIW80_14005","CIW80_07980","CIW80_09565","CIW80_08170","CIW80_13985","CIW80_04530","CIW80_04535","CIW80_13320","CIW80_25745","CIW80_08590","CIW80_22755","CIW80_25415","CIW80_18640","CIW80_14595","CIW80_02775","CIW80_01170","CIW80_06280","CIW80_12410","CIW80_12420","CIW80_12430","CIW80_12415","CIW80_22165","CIW80_22160","CIW80_22150","CIW80_22155","CIW80_21940","CIW80_21275","CIW80_01250","CIW80_20205","CIW80_20670","CIW80_22020","CIW80_04590","CIW80_20870","CIW80_19985","CIW80_21180","CIW80_22200","CIW80_03850","CIW80_10380","CIW80_14155","CIW80_20695","CIW80_18155","CIW80_11490","CIW80_06425","CIW80_10510","CIW80_21195","CIW80_18130","CIW80_22640","CIW80_01255","CIW80_08190","CIW80_21035","CIW80_14660","CIW80_15035","CIW80_01430","CIW80_06255","CIW80_20260","CIW80_20275","CIW80_20270","CIW80_20265","CIW80_15345","CIW80_05950","CIW80_09515","CIW80_15350","CIW80_21595","CIW80_20630","CIW80_11820","CIW80_03510","CIW80_24180","CIW80_22105","CIW80_07745","CIW80_18835","CIW80_24095","CIW80_24830","CIW80_07175","CIW80_17620","CIW80_08580","CIW80_06600","CIW80_12445","CIW80_01130","CIW80_05570","CIW80_04460","CIW80_02200","CIW80_24785","CIW80_14275","CIW80_21045","CIW80_21040","CIW80_05355","CIW80_05360","CIW80_05370","CIW80_05345","CIW80_05325","CIW80_05320","CIW80_05310","CIW80_05350","CIW80_05340","CIW80_05335","CIW80_05315","CIW80_05365","CIW80_05330","CIW80_16415","CIW80_17710","CIW80_22380","CIW80_08340","CIW80_17870","CIW80_01545","CIW80_06180","CIW80_20135","CIW80_19760","CIW80_13960","CIW80_09665","CIW80_05750","CIW80_10450","CIW80_13315","CIW80_08020","CIW80_04600","CIW80_15960","CIW80_17425","CIW80_17435","CIW80_11735","CIW80_18540","CIW80_21445","CIW80_08200","CIW80_21880","CIW80_21225","CIW80_10470","CIW80_16660","CIW80_16665","CIW80_17965","CIW80_23100","CIW80_00660","CIW80_01875","CIW80_25675","CIW80_06315","CIW80_19770","CIW80_25395","CIW80_12290","CIW80_17720","CIW80_06485","CIW80_06195","CIW80_20440","CIW80_20435","CIW80_24060","CIW80_09770","CIW80_06480","CIW80_01280","CIW80_01285","CIW80_01275","CIW80_01270","CIW80_22140","CIW80_14255","CIW80_01020","CIW80_12085","CIW80_21530","CIW80_06475","CIW80_01260","CIW80_21870","CIW80_20345","CIW80_07650","CIW80_23005","CIW80_05265","CIW80_22830","CIW80_05270","CIW80_05260","CIW80_01295","CIW80_15015","CIW80_20230","CIW80_15010","CIW80_15020","CIW80_21920","CIW80_14475","CIW80_00360","CIW80_11600","CIW80_06405","CIW80_16550","CIW80_16545","CIW80_14630","CIW80_16420","CIW80_08285","CIW80_04705","CIW80_21900","CIW80_21170","CIW80_08325","CIW80_12580","CIW80_13475","CIW80_00340","CIW80_00825","CIW80_00820","CIW80_06950","CIW80_16160","CIW80_07000","CIW80_09840","CIW80_17055","CIW80_17060","CIW80_13445","CIW80_20705","CIW80_20700","CIW80_00920","CIW80_01345","CIW80_01760","CIW80_08255","CIW80_02910","CIW80_21135","CIW80_05740","CIW80_09345","CIW80_05445","CIW80_00930","CIW80_17340","CIW80_14750","CIW80_20880","CIW80_10720","CIW80_23080","CIW80_01540","CIW80_12065","CIW80_09335","CIW80_20120","CIW80_16025","CIW80_01905","CIW80_13170","CIW80_00775","CIW80_15185","CIW80_03365","CIW80_10650","CIW80_10655","CIW80_05170","CIW80_00180","CIW80_13025","CIW80_15785","CIW80_01960","CIW80_04650","CIW80_24125","CIW80_15795","CIW80_15790","CIW80_15770","CIW80_15755","CIW80_04025","b4104","CIW80_08250","CIW80_07075","CIW80_07070","CIW80_02015","CIW80_04640","CIW80_00915","CIW80_20875","CIW80_20455","CIW80_16115","CIW80_08345","CIW80_25620","CIW80_22035","CIW80_05180","CIW80_20995","CIW80_02565","CIW80_10070","CIW80_07660","CIW80_15760","CIW80_24925","CIW80_00615","CIW80_00620","CIW80_00625","CIW80_00630","CIW80_00635","CIW80_01035","CIW80_01445","CIW80_01450","CIW80_01475","CIW80_02600","CIW80_03245","CIW80_03255","CIW80_03260","CIW80_05175","CIW80_05655","CIW80_05660","CIW80_05670","CIW80_06400","CIW80_06805","CIW80_07155","CIW80_08025","CIW80_10150","CIW80_10160","CIW80_10145","CIW80_10155","CIW80_10185","CIW80_10180","CIW80_10190","CIW80_10195","CIW80_10950","CIW80_12510","CIW80_12610","CIW80_12665","CIW80_12825","CIW80_12830","CIW80_12835","CIW80_12840","CIW80_12860","CIW80_14375","CIW80_14725","CIW80_14735","CIW80_14720","CIW80_15200","CIW80_16380","CIW80_17800","CIW80_21420","CIW80_22495","CIW80_22665","CIW80_14050","CIW80_16385","CIW80_23530","CIW80_23545","CIW80_23550","CIW80_24585","CIW80_25715","CIW80_24595","CIW80_24590","CIW80_12845","CIW80_12030","CIW80_03825","CIW80_04275","CIW80_04280","CIW80_04285","CIW80_04290","CIW80_04330","CIW80_04380","CIW80_04410","CIW80_04405","CIW80_04420","CIW80_04400","CIW80_04415","CIW80_21840","CIW80_17680","CIW80_RS08680","CIW80_RS08670","CIW80_RS08665","CIW80_RS08675","CIW80_RS23320","CIW80_RS23305","CIW80_RS23310","CIW80_RS03275","CIW80_RS03255","CIW80_RS03260","CIW80_RS03270","CIW80_RS03265","CIW80_RS03280","CIW80_RS03240","CIW80_RS03245","CIW80_RS03235","CIW80_RS03400","CIW80_RS03430","CIW80_RS03445","CIW80_RS03405","CIW80_RS03420","CIW80_RS03440","CIW80_RS03465","CIW80_RS03450","CIW80_RS03385","CIW80_RS03435","CIW80_RS03425","CIW80_RS03390","CIW80_RS03410","CIW80_RS03415","CIW80_RS03380"]

diff = set(all_g) - set(incorrect_g)
diff

in_g_2 =["CIW80_11380","CIW80_14585","CIW80_25245","CIW80_05160","CIW80_19490","CIW80_20185","CIW80_09695","CIW80_17565","CIW80_15100","CIW80_14210","CIW80_05635","CIW80_21570","CIW80_15590","CIW80_11530","CIW80_24810","CIW80_15425","CIW80_18000","CIW80_14640","CIW80_17570","CIW80_14635","CIW80_11730","CIW80_02190","CIW80_18060","CIW80_18065","CIW80_13235","CIW80_22555","CIW80_06825","CIW80_06830","CIW80_22805","CIW80_08355","CIW80_06160","CIW80_17715","CIW80_06165","CIW80_08430","CIW80_14520","CIW80_05940","CIW80_04300","CIW80_05975","CIW80_17575","CIW80_09865","CIW80_13410","CIW80_10060","CIW80_22525","CIW80_22530","CIW80_06120","CIW80_20755","CIW80_18070","CIW80_20740","CIW80_13765","CIW80_12895","CIW80_25100","CIW80_02175","CIW80_16050","CIW80_00890","CIW80_01040","CIW80_08440","CIW80_18535","CIW80_17250","CIW80_13515","CIW80_10415","CIW80_16015","CIW80_10735","CIW80_10740","CIW80_12640","CIW80_17990","CIW80_16315","CIW80_12545","CIW80_08240","CIW80_18330","CIW80_08245","CIW80_21775","CIW80_19885","CIW80_16555","CIW80_21090","CIW80_21095","CIW80_21080","CIW80_21085","CIW80_25350","CIW80_20665","CIW80_02195","CIW80_00760","CIW80_22505","CIW80_10570","CIW80_14745","CIW80_20935","CIW80_17940","CIW80_12260","CIW80_22565","CIW80_13805","CIW80_14915","CIW80_12800","CIW80_19410","CIW80_15340","CIW80_01600","CIW80_08135","CIW80_25550","CIW80_16305","CIW80_12630","CIW80_07490","CIW80_06635","CIW80_03330","CIW80_01955","CIW80_24890","CIW80_07585","CIW80_18160","CIW80_14765","CIW80_22130","CIW80_16210","CIW80_21345","CIW80_24070","CIW80_23035","CIW80_09240","CIW80_07690","CIW80_07680","CIW80_19860","CIW80_25120","CIW80_18660","CIW80_21605","CIW80_14760","CIW80_18410","CIW80_18190","CIW80_03900","CIW80_03920","CIW80_05985","CIW80_05955","CIW80_14710","CIW80_11745","CIW80_08470","CIW80_06015","CIW80_04630","CIW80_17845","CIW80_04055","CIW80_21885","CIW80_13510","CIW80_07110","CIW80_18235","CIW80_01580","CIW80_11370","CIW80_16625","CIW80_04995","CIW80_01610","CIW80_17955","CIW80_07320","CIW80_02180","CIW80_18435","CIW80_07610","CIW80_07450","CIW80_04955","CIW80_00315","CIW80_25040","CIW80_25045","CIW80_00325","CIW80_25035","CIW80_00845","CIW80_05640","CIW80_14205","CIW80_17595","CIW80_21810","CIW80_05995","CIW80_21815","CIW80_19955","CIW80_13600","CIW80_18045","CIW80_18040","CIW80_18035","CIW80_11865","CIW80_11860","CIW80_12550","CIW80_22690","CIW80_19740","CIW80_22315","CIW80_21245","CIW80_15490","CIW80_20545","CIW80_07185","CIW80_07180","CIW80_24880","CIW80_21640","CIW80_22885","CIW80_13520","CIW80_13525","CIW80_13535","CIW80_15580","CIW80_02700","CIW80_02695","CIW80_22860","CIW80_16770","CIW80_16765","CIW80_16775","CIW80_11580","CIW80_07685","CIW80_14555","CIW80_15320","CIW80_06305","CIW80_14740","CIW80_15970","CIW80_12735","CIW80_22685","CIW80_04930","CIW80_04940","CIW80_21125","CIW80_21760","CIW80_20575","CIW80_11935","CIW80_15620","CIW80_15615","CIW80_15605","CIW80_01795","CIW80_08075","CIW80_07890","CIW80_01125","CIW80_07040","CIW80_23930","CIW80_25290","CIW80_25165","CIW80_16440","CIW80_16570","CIW80_11985","CIW80_11970","CIW80_11990","CIW80_21500","CIW80_19880","CIW80_18765","CIW80_18775","CIW80_08420","CIW80_24790","CIW80_17775","CIW80_13495","CIW80_13505","CIW80_13500","CIW80_22285","CIW80_15745","CIW80_15715","CIW80_22040","CIW80_00420","CIW80_12970","CIW80_14825","CIW80_12120","CIW80_05145","CIW80_18590","CIW80_07285","CIW80_07290","CIW80_11835","CIW80_03515","CIW80_00150","CIW80_03915","CIW80_03925","CIW80_13725","CIW80_22670","CIW80_01605","CIW80_18320","CIW80_18325","CIW80_10115","CIW80_19855","CIW80_18170","CIW80_05715","CIW80_14335","CIW80_14315","CIW80_21475","CIW80_06755","CIW80_24075","CIW80_22995","CIW80_12405","CIW80_25180","CIW80_21075","CIW80_20980","CIW80_23955","CIW80_18510","CIW80_19910","CIW80_04585","CIW80_18665","CIW80_16120","CIW80_21590","CIW80_21600","CIW80_21585","CIW80_25345","CIW80_18345","CIW80_16000","CIW80_19405","CIW80_16525","CIW80_25730","CIW80_10475","CIW80_10465","CIW80_21915","CIW80_14260","CIW80_09765","CIW80_23010","CIW80_07495","CIW80_23020","CIW80_23015","CIW80_15025","CIW80_08105","CIW80_05755","CIW80_23000","CIW80_23025","CIW80_14480","CIW80_00350","CIW80_14485","CIW80_00355","CIW80_21875","CIW80_22135","CIW80_21910","CIW80_21150","CIW80_17585","CIW80_15955","CIW80_21425","CIW80_00835","CIW80_08085","CIW80_00090","CIW80_23085","CIW80_08015","CIW80_14885","CIW80_15800","CIW80_15775","CIW80_15820","CIW80_15815","CIW80_21300","CIW80_10965","CIW80_02255","CIW80_04360","CIW80_08175","CIW80_10165","CIW80_11430","CIW80_12515","CIW80_14165","CIW80_15455","CIW80_16615","CIW80_16645","CIW80_18750","CIW80_20765","CIW80_21230","CIW80_19220","CIW80_19225","CIW80_03965","CIW80_16605"]

diff_2 = set(diff) - set(in_g_2)
diff_2

{'CIW80_03270',
 'CIW80_03275',
 'CIW80_03280',
 'CIW80_03290',
 'CIW80_03390',
 'CIW80_03395',
 'CIW80_03400',
 'CIW80_03405',
 'CIW80_03410',
 'CIW80_03415',
 'CIW80_03420',
 'CIW80_03425',
 'CIW80_03430',
 'CIW80_03435',
 'CIW80_03440',
 'CIW80_03445',
 'CIW80_03455',
 'CIW80_03460',
 'CIW80_03465',
 'CIW80_03485',
 'CIW80_08730',
 'CIW80_08735',
 'CIW80_08740',
 'CIW80_08745',
 'CIW80_10170',
 'CIW80_17025',
 'CIW80_17030',
 'CIW80_23535',
 'CIW80_23540'}

In [19]:
EcN_model.genes.CIW80_00090.annotation

{'ncbigene': ['tehB'], 'ncbiprotein': ['AXY44389.1']}

## 2. Remove metabolites without reaction

In [20]:
# find all metabolites without a reaction
for mtb in EcN_model.metabolites:
    if EcN_model.metabolites.get_by_id(mtb.id).reactions == frozenset():
        print(mtb)
        EcN_model.remove_metabolites(mtb)
        
# Some empty metabolites were not removed > run second time
print('\nSecond run')
for mtb in EcN_model.metabolites:
    if EcN_model.metabolites.get_by_id(mtb.id).reactions == frozenset():
        print(mtb)
        EcN_model.remove_metabolites(mtb)

gg4abut_c
sla_c
frulysp_c
dhpppn_c
cynt_c
sf_c
hkntd_c
dhcinnm_c
dhps_c
dhps_p
ggptrc_c
sq_c
cinnm_c
o16a4und_p
2hptcoa_c
man6pglyc_c
frulys_c
sqg_c
dhptdp_c
cechddd_c
o16aund_p
dhptdd_c
34dphacoa_c
ragund_c
thcur_c
udpgalfur_c
3hcinnm_c
garagund_c
psclys_c
kphphhlipa_c
cenchddd_c
2oxpaccoa_c
3oxdhscoa_c
cur_c
op4en_c
dtdp4d6dm_c
hkndd_c
dtdprmn_c
rephaccoa_c
23doguln_p
sq_p
3hpppn_c
ggbutal_c
aragund_c
mmet_c
4h2opntn_c
dhcur_c
4abzglu_c

Second run
2hptcl_c
sfp_c
o16a3und_p
o16a2und_p


## 3. Remove genes without reaction

In [21]:
# Load gene_origin dataframe
gene_origin_df = pd.read_csv('../tables/gene_origin.csv')
gene_origin_df.set_index('EcN_gene', inplace=True)

In [22]:
print('The original number of genes is:', len(EcN_model.genes), '\n')

# Create a list of genes not associated to reactions
gene_del_list = []

for gene in EcN_model.genes:
    if EcN_model.genes.get_by_id(gene.id).reactions == frozenset():
        print(gene)
        gene_del_list.append(gene.id)
        
        gene_origin_df.loc[gene.id, 'added'] = 'removed'
        gene_origin_df.loc[gene.id, 'notes'] = 'Empty gene. Removed in 4.7'
        
# Remove genes
cobra.manipulation.remove_genes(EcN_model, gene_del_list)
        
print('\nThe new number of genes is:', len(EcN_model.genes))

The original number of genes is: 1508 

CIW80_05275
CIW80_03935
CIW80_06805
CIW80_10195

The new number of genes is: 1504


# 3. Save model

In [23]:
# Save the model
cobra.io.json.save_json_model(EcN_model, str('../data/models/%s_cur_4.7.json'%EcN_ID))

In [24]:
# Save as a table
gene_origin_df.to_csv('../tables/gene_origin.csv')