In [1]:
# Update species_annotation, so that result returns sorted, CHEBI candidates & match score

In [2]:
import editdistance
import libsbml
import numpy as np
import operator
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [3]:
prev_notebook_dir = '/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/annotation_recommender/notebook'
with open(os.path.join(prev_notebook_dir, 'chebi_models.pickle'), 'rb') as handle:
  chebi_models = pickle.load(handle)

with open(os.path.join(cn.REF_DIR, 'chebi_shortened_formula_30apr2022.pickle'), 'rb') as f:
  ref_shortened_chebi_to_formula = pickle.load(f)
with open(os.path.join(cn.REF_DIR, 'chebi_low_synonyms.pickle'), 'rb') as f:
  chebi_low_synonyms = pickle.load(f)

one_biomd = 'BIOMD0000000634.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
species_an = sa.SpeciesAnnotation(libsbml_fpath=one_biomd_fpath)
model_itm = chebi_models[one_biomd]
pred_species = species_an.predictAnnotationByName(inp_spec_list=list(model_itm.keys()))

reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()

In [4]:
species_an.match_score

{'ATP': [('CHEBI:15422', 1.0), ('CHEBI:30616', 1.0)],
 'ADP': [('CHEBI:16761', 1.0), ('CHEBI:456216', 1.0), ('CHEBI:73342', 1.0)],
 'AMP': [('CHEBI:16027', 1.0), ('CHEBI:28971', 1.0), ('CHEBI:456215', 1.0)]}

In [5]:
pred_species['ADP']['chebi']

['CHEBI:16761', 'CHEBI:456216', 'CHEBI:73342']

In [6]:
# url_default = 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A'
# for one_k in pred_species.keys():
#   urls = [url_default+val for val in pred_species[one_k]['chebi']]
#   print(urls)

['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:15422', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:30616']
['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:16761', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:456216', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:73342']
['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:16027', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:28971', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3ACHEBI:456215']


In [13]:
# Check tests
E_COLI_PATH = os.path.join(BIGG_DIR, 'e_coli_core.xml')
BIOMD_248_PATH = os.path.join(BIOMODEL_DIR, 'BIOMD0000000248.xml')
# IDs of species
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
ONESET_SPECIES_IDS = [M_FDP_C, M_ATP_C]

In [14]:
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = E_COLI_PATH)

In [20]:
one_spec_name = spec_cl.model.getSpecies(M_FDP_C).name.lower()
one_pred_spec = spec_cl.predictAnnotationByEditDistance(inp_str=one_spec_name)

In [21]:
one_pred_spec

{'chebi': ['CHEBI:16905', 'CHEBI:49299'],
 'match_score': [('CHEBI:16905', 1.0), ('CHEBI:49299', 1.0)],
 'formula': ['C6O12P2']}

In [25]:
one_pred_spec = spec_cl.predictAnnotationByName(inp_spec_list=ONESET_SPECIES_IDS)
one_pred_spec

{'M_fdp_c': {'chebi': ['CHEBI:16905', 'CHEBI:49299'],
  'match_score': [('CHEBI:16905', 1.0), ('CHEBI:49299', 1.0)],
  'formula': ['C6O12P2']},
 'M_atp_c': {'chebi': ['CHEBI:182955', 'CHEBI:135736'],
  'match_score': [('CHEBI:182955', 0.23), ('CHEBI:135736', 0.06)],
  'formula': ['C18ClN2O6S2', 'C20O4']}}

In [26]:
spec_cl.evaluatePredictedSpeciesAnnotation(ONESET_SPECIES_IDS)

{'M_fdp_c': 1.0, 'M_atp_c': 0.038285714285714284}