# Comapring database annotation with *i*ML1515

In [53]:
import cobra
import pandas as pd

## Load the two models

In [54]:
ich360=cobra.io.read_sbml_model('./../../Model/iCH360/Escherichia_coli_iCH360.xml')
iml1515=cobra.io.read_sbml_model('../../Manuscript_Figures/data/metabolic_models/iML1515/iML1515.xml')

'' is not a valid SBML 'SId'.


In [55]:
def rxn_filter(model,rxn):
    ''' Boolean operator to filter enzymatic reactions'''
    if rxn in model.boundary:
        return False
    elif rxn.id == 'Biomass':
        return False
    else:
        return True
    
metabolic_reactions=[r for r in ich360.reactions if rxn_filter(ich360,r)]

Loop across databases and metabolic reactions in *i*CH360. For each reaction, count if annotation to that databse is available in *i*ML1515 and in *i*CH360. Further, note down whether the metanetX annotation was updated in *i*CH360 from a deprecated ID in *i*ML1515 (should the ID not being the update one in iCH360, also proceed to update it)

In [56]:
metanetX_deprecated_data=pd.read_csv('../../external_database_data/metanetX_deprecated_reactions.tsv',sep='\t',comment='#').set_index('deprecated_ID')
metanetX_deprecated_data.head()

Unnamed: 0_level_0,ID,version
deprecated_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
MNXR1,MNXR94668,3.*
MNXR10,MNXR94678,3.*
MNXR100,MNXR94811,3.*
MNXR1000,MNXR109728,3.*
MNXR100002,MNXR144918,4.0


In [57]:
dbs=['biocyc','metanetx.reaction','kegg.reaction']
stats={'database':[],'original':[],'deprecated_in_original':[],'current':[],'total':[]}

for db in dbs:
    iml1515_count=0
    ich360_count=0
    deprecated_count=0   
    for r in metabolic_reactions:
        r_id=r.id
        r_iml1515=iml1515.reactions.get_by_id(r_id)
        r_ich360=ich360.reactions.get_by_id(r_id)

        if db in r_iml1515.annotation.keys():
            iml1515_count+=1
        if db in r_ich360.annotation.keys():
            ich360_count+=1
        if (db=='metanetx.reaction' and 'metanetx.reaction' in r_iml1515.annotation.keys() and r_iml1515.annotation['metanetx.reaction'] in metanetX_deprecated_data.index.to_list()): 
            deprecated_count+=1

            deprecated_id=r_iml1515.annotation['metanetx.reaction']
            new_id=metanetX_deprecated_data.loc[[r_iml1515.annotation['metanetx.reaction']]].iloc[0]['ID']
            if r_ich360.annotation['metanetx.reaction']==deprecated_id:
                r_ich360.annotation['metanetx.reaction']=new_id
                r_ich360.annotation['deprecated_metanetx.reaction']=deprecated_id
                print(f"Changed metanetX annotation of {r_id} from {deprecated_id} to {new_id}")

    stats['database'].append(db)
    stats['original'].append(iml1515_count)
    stats['current'].append(ich360_count)
    stats['deprecated_in_original'].append(deprecated_count)
    stats['total'].append(len(metabolic_reactions))

stats_df=pd.DataFrame.from_dict(stats)
stats_df.head()

Unnamed: 0,database,original,deprecated_in_original,current,total
0,biocyc,205,0,317,322
1,metanetx.reaction,320,143,320,322
2,kegg.reaction,193,0,206,322


In [58]:
cobra.io.write_sbml_model(ich360,'../../Model/iCH360/Escherichia_coli_iCH360.xml')
cobra.io.save_json_model(ich360,'../../Model/iCH360/Escherichia_coli_iCH360.json')
stats_df.to_csv('out/annotation_stats.csv',index=False)