In [1]:
# Update species_annotation, so that result returns sorted, CHEBI candidates & match score

In [1]:
import editdistance
import libsbml
import numpy as np
import operator
import os
import compress_pickle
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import constants as cn
from AMAS import tools

In [2]:
os.getcwd()

'/Users/woosubs/Desktop/AutomateAnnotation/AMAS/notebooks'

In [3]:
prev_notebook_dir = '/Users/woosubs/Desktop/AutomateAnnotation/AnnotationRecommender/annotation_recommender/notebook'
with open(os.path.join(prev_notebook_dir, 'chebi_models.pickle'), 'rb') as handle:
  chebi_models = pickle.load(handle)

with open(os.path.join(cn.REF_DIR, 'chebi_shortened_formula_comp.lzma'), 'rb') as f:
  ref_shortened_chebi_to_formula = compress_pickle.load(f)
with open(os.path.join(cn.REF_DIR, 'chebi_low_synonyms_comp.lzma'), 'rb') as f:
  chebi_low_synonyms = compress_pickle.load(f)

In [4]:
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
M_AMP_C = 'M_amp_c'
ONESET_SPECIES_IDS = [M_FDP_C, M_ATP_C]
ONE_CHEBI = 'CHEBI:15414'
DUMMY_RECOMMENDATION = cn.Recommendation('SAM',
                                         1.0,
                                         [('CHEBI:15414', 1.0), ('CHEBI:59789', 1.0)],
                                         ['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15414',
                                         'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A59789'])
DUMMY_ID = 'SAM'
# Dummy data for calculating accuracy, recalll & precision
DUMMY_REF = {'a': ['ABC', 'BCD'],
              'b': ['DEF']}
DUMMY_PRED = {'a': ['ABC'],
             'b': ['AAA']}

In [6]:
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = ecoli_fpath)

In [13]:
one_pred_spec = spec_cl.predictAnnotationByEditDistance(inp_str=spec_cl.names[M_AMP_C])
one_res_formula = {M_AMP_C: one_pred_spec[cn.FORMULA]}
recall2 = spec_cl.getRecall(pred_annotation=one_res_formula,
                                 mean=True)

In [14]:
recall2

1.0

In [4]:
one_biomd = 'BIOMD0000000634.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
species_an = sa.SpeciesAnnotation(libsbml_fpath=one_biomd_fpath)
model_itm = chebi_models[one_biomd]
pred_one_spec = species_an.predictAnnotationByEditDistance('ATP')
print(pred_one_spec)


# pred_species = species_an.predictAnnotationByName(inp_spec_list=list(model_itm.keys()))

# reader = libsbml.SBMLReader()
# document = reader.readSBML(one_biomd_fpath)
# model = document.getModel()

{'name_used': 'ATP', 'chebi': ['CHEBI:15422', 'CHEBI:30616'], 'match_score': [('CHEBI:15422', 1.0), ('CHEBI:30616', 1.0)], 'formula': ['C10N5O13P3']}


In [4]:
species_an.evaluatePredictedSpeciesAnnotation(pred_one_spec)

1.0

In [10]:
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
species_an = sa.SpeciesAnnotation(libsbml_fpath=ecoli_fpath)
pred_spec = species_an.predictAnnotationByEditDistance(M_ATP_C)
print(pred_spec)
pred_score = species_an.evaluatePredictedSpeciesAnnotation(pred_result=pred_spec)
print(pred_score)
print("************************************")
pred_spec = species_an.predictAnnotationByEditDistance(M_FDP_C)
print(pred_spec)
pred_score = species_an.evaluatePredictedSpeciesAnnotation(pred_result=pred_spec)
print(pred_score)

{'name_used': 'M_atp_c', 'chebi': ['CHEBI:147284'], 'match_score': [('CHEBI:147284', 0.4)], 'formula': ['C20N2OS.ClH']}
0.07957664046479836
************************************
{'name_used': 'M_fdp_c', 'chebi': ['CHEBI:132167', 'CHEBI:9437', 'CHEBI:137272', 'CHEBI:147284', 'CHEBI:183049', 'CHEBI:53005', 'CHEBI:75949', 'CHEBI:87194', 'CHEBI:59414', 'CHEBI:73629', 'CHEBI:38848', 'CHEBI:40265', 'CHEBI:53074', 'CHEBI:75062', 'CHEBI:37917', 'CHEBI:34756', 'CHEBI:34467'], 'match_score': [('CHEBI:132167', 0.33), ('CHEBI:9437', 0.33), ('CHEBI:137272', 0.29), ('CHEBI:147284', 0.2), ('CHEBI:183049', 0.2), ('CHEBI:53005', 0.2), ('CHEBI:75949', 0.2), ('CHEBI:87194', 0.2), ('CHEBI:59414', 0.18), ('CHEBI:73629', 0.18), ('CHEBI:38848', 0.12), ('CHEBI:40265', 0.12), ('CHEBI:53074', 0.12), ('CHEBI:75062', 0.11), ('CHEBI:37917', 0.1), ('CHEBI:34756', 0.09), ('CHEBI:34467', 0.07)], 'formula': ['C40NO8P', 'C16N3O6S', 'C20N2OS.ClH', 'C10MgN5O10P2', 'C109N16O22S', 'C44N4O4', 'C8ClN7O', 'C19N4O11', 'C10F18',

In [7]:
len(pred_spec['chebi'])

1

In [13]:
# Check tests
E_COLI_PATH = os.path.join(BIGG_DIR, 'e_coli_core.xml')
BIOMD_248_PATH = os.path.join(BIOMODEL_DIR, 'BIOMD0000000248.xml')
# IDs of species
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
ONESET_SPECIES_IDS = [M_FDP_C, M_ATP_C]

In [14]:
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = E_COLI_PATH)

In [20]:
one_spec_name = spec_cl.model.getSpecies(M_FDP_C).name.lower()
one_pred_spec = spec_cl.predictAnnotationByEditDistance(inp_str=one_spec_name)

In [21]:
one_pred_spec

{'chebi': ['CHEBI:16905', 'CHEBI:49299'],
 'match_score': [('CHEBI:16905', 1.0), ('CHEBI:49299', 1.0)],
 'formula': ['C6O12P2']}

In [25]:
one_pred_spec = spec_cl.predictAnnotationByName(inp_spec_list=ONESET_SPECIES_IDS)
one_pred_spec

{'M_fdp_c': {'chebi': ['CHEBI:16905', 'CHEBI:49299'],
  'match_score': [('CHEBI:16905', 1.0), ('CHEBI:49299', 1.0)],
  'formula': ['C6O12P2']},
 'M_atp_c': {'chebi': ['CHEBI:182955', 'CHEBI:135736'],
  'match_score': [('CHEBI:182955', 0.23), ('CHEBI:135736', 0.06)],
  'formula': ['C18ClN2O6S2', 'C20O4']}}

In [26]:
spec_cl.evaluatePredictedSpeciesAnnotation(ONESET_SPECIES_IDS)

{'M_fdp_c': 1.0, 'M_atp_c': 0.038285714285714284}