In [1]:
# collect data and get models to predict credibility score
import collections
import copy
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [2]:
biomds = [val for val in os.listdir(BIOMODEL_DIR) if val[-4:]=='.xml']
one_biomd = biomds[0]
print(one_biomd)

BIOMD0000000634.xml


In [3]:
one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
recom = recommender.Recommender(libsbml_fpath=one_fpath)

In [4]:
recom.species.exist_annotation

{'ROS': ['CHEBI:26523'],
 'damDNA': ['CHEBI:16991'],
 'ATP': ['CHEBI:15422'],
 'ADP': ['CHEBI:16761'],
 'AMP': ['CHEBI:16027']}

In [5]:
model_names = []
len_names = []
num_cands = []
num_formulas = []
match_scores = []
#
for idx, one_biomd in enumerate(biomds):
  if idx % 100 == 0:
    print("We are at", idx)
  one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  specs2evaluate = list(recom.species.exist_annotation_formula.keys())
  # run algorithm if there is at least one species to evaluate
  if specs2evaluate:
    recoms = recom.getSpeciesListAnnotation(pred_ids=recom.species.exist_annotation_formula.keys())
    recalls = recom.getSpeciesStatistics(model_mean=False)[cn.RECALL]
    for one_recom in recoms:
      model_names.append(one_biomd)
      len_names.append(len(recom.species.getNameToUse(one_recom.id)))
      num_cands.append(len(one_recom.candidates))
      num_formulas.append(len(recom.species.formula[one_recom.id]))
      match_scores.append(one_recom.candidates[0][1])

We are at 0


ZeroDivisionError: division by zero

In [10]:
refs = {val:recom.species.exist_annotation_formula[val] \
        for val in recom.species.exist_annotation_formula.keys() \
        if recom.species.exist_annotation_formula[val]}
specs2eval = list(refs.keys())
preds_comb = recom.species.predictAnnotationByCosineSimilarity(inp_ids=specs2eval)
preds = {val:preds_comb[val][cn.FORMULA] for val in preds_comb.keys()}

In [13]:
# re-run so that chebi candidates can be converted into terms providing formula

cn.REF_CHEBI2FORMULA['CHEBI:53715']

KeyError: 'CHEBI:53715'

In [12]:
recom.species.candidates

{'SBP': [('CHEBI:53715', 0.963)],
 'S7P': [('CHEBI:18303', 0.816),
  ('CHEBI:32412', 0.816),
  ('CHEBI:53276', 0.816),
  ('CHEBI:73648', 0.816),
  ('CHEBI:157881', 0.816),
  ('CHEBI:46710', 0.816),
  ('CHEBI:185793', 0.816),
  ('CHEBI:30224', 0.816),
  ('CHEBI:74820', 0.816)],
 'ATP': [('CHEBI:30616', 1.0),
  ('CHEBI:61432', 1.0),
  ('CHEBI:15422', 1.0),
  ('CHEBI:74926', 1.0),
  ('CHEBI:37537', 1.0),
  ('CHEBI:53394', 1.0),
  ('CHEBI:15702', 1.0)],
 'ADP': [('CHEBI:135948', 1.0),
  ('CHEBI:18383', 1.0),
  ('CHEBI:42085', 1.0),
  ('CHEBI:456216', 1.0),
  ('CHEBI:53488', 1.0),
  ('CHEBI:2247', 1.0),
  ('CHEBI:51676', 1.0),
  ('CHEBI:39867', 1.0),
  ('CHEBI:34936', 1.0),
  ('CHEBI:131833', 1.0),
  ('CHEBI:4640', 1.0),
  ('CHEBI:73342', 1.0),
  ('CHEBI:16761', 1.0)],
 'NADPH': [('CHEBI:13392', 1.0), ('CHEBI:16474', 1.0), ('CHEBI:57783', 1.0)],
 'Pi': [('CHEBI:28874', 1.0),
  ('CHEBI:74076', 1.0),
  ('CHEBI:74790', 1.0),
  ('CHEBI:35780', 1.0),
  ('CHEBI:36349', 1.0),
  ('CHEBI:36350', 1.0

In [11]:
preds

{'SBP': [],
 'S7P': ['C8N2O4', 'C26NO12P', 'PS', 'C8NO10P', 'C8N2O3S', '(C8)n'],
 'ATP': ['C8O4', 'C7O4', 'C36O8', 'O40PW12', 'C10N5O13P3'],
 'ADP': ['C12N3O6',
  'C9Cl2NO',
  'C8O2',
  'C26',
  'C5N3',
  'C3N2O2',
  'C12N',
  'C9N2O2',
  'C10N5O10P2',
  'C22O2',
  'C20N10O11P2'],
 'NADPH': ['C21N7O17P3'],
 'Pi': ['C11N2O3', 'C7NO', 'C10N5'],
 'CO2': ['CO', 'CO2', 'Co'],
 'GLYc': ['C4N2O2'],
 'SERc': ['C9N3O6S'],
 'SucPc': ['C32CuN8'],
 'NADH': ['C21N7O14P2'],
 'Pic': ['(C10N4O7P)n(C9H12N3O7P)nC38H50N14O30P4', 'C7NO', 'C50NO13P'],
 'ATPc': ['C6NO2', 'C5NS5'],
 'ADPc': ['C48NO8P', '(C8ClN)n', 'C9Cl2NO']}

In [49]:
recom.species.exist_annotation_formula

{'cca1_mrna': [],
 'toc1luc_mrna': [],
 'cca1luc_mrna': [],
 'luc_mrna': [],
 'luc': []}

In [27]:
recom.getSpeciesListAnnotation(pred_ids=recom.species.exist_annotation.keys())

[Recommendation(id='ROS', credibility=0.93, candidates=[('CHEBI:26523', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A26523']),
 Recommendation(id='damDNA', credibility=0.99, candidates=[('CHEBI:31882', 0.95), ('CHEBI:6121', 0.95), ('CHEBI:59990', 0.95)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A31882', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A6121', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A59990']),
 Recommendation(id='ATP', credibility=0.97, candidates=[('CHEBI:15422', 1.0), ('CHEBI:74926', 1.0), ('CHEBI:15702', 1.0), ('CHEBI:53394', 1.0), ('CHEBI:61432', 1.0), ('CHEBI:37537', 1.0), ('CHEBI:30616', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15422', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A74926', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15702', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A53394', 'https://www.ebi.ac.uk/chebi/searchId.do?

In [18]:
one_res = recom.getReactionStatistics(model_mean=False)
one_res

In [9]:
one_biomd = 'BIOMD0000000190.xml'