In [1]:
# Update iterator algorithm; and making recommendations; 
import collections
import copy
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

# Should save & load predicted species & reaction annotations

In [2]:
# get recom 
one_fpath = os.path.join(BIOMODEL_DIR, 'BIOMD0000000190.xml')
reader = libsbml.SBMLReader()
document = reader.readSBML(one_fpath)
model = document.getModel()
recom = recommender.Recommender(libsbml_fpath=one_fpath)

In [3]:
mssc = 'top'
cutoff = 0.0
model_mean = True

In [9]:
res = recom.getSpeciesRecommendation(pred_str='hydrogen',
                                     mssc='above',
                                     cutoff=0.0)

In [10]:
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
recom2 = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [17]:
fdp_pred_spec = recom2.species.getEScores(inp_strs=[M_FDP_C])[M_FDP_C][:17]
fdp_score = recom2.species.evaluatePredictedSpeciesAnnotation(pred=fdp_pred_spec,
                                                            name_used=recom2.species.getNameToUse(M_FDP_C))

In [22]:
atp_pred_spec = recom2.species.getEScores(inp_strs=[M_ATP_C])[M_ATP_C][:2]
atp_score = recom2.species.evaluatePredictedSpeciesAnnotation(pred=atp_pred_spec,
                                                            name_used=recom2.species.getNameToUse(M_ATP_C))

In [23]:
atp_score

0.97236743276001

In [3]:
# two options: 
# 1. MSSC best with cutoff=0.0 (default option)
# 2. MSSC above (cutoff=0.0?)

# basically species should return a list of ranked candidates with scores; 

In [4]:
spec_res = recom.species.predictAnnotationByEditDistance('hydrogen')
spec_res

{'name_used': 'hydrogen',
 'chebi': ['CHEBI:18276', 'CHEBI:49637'],
 'match_score': [('CHEBI:18276', 1.0), ('CHEBI:49637', 1.0)],
 'formula': ['H']}

In [5]:
import editdistance
import operator
from AMAS.species_annotation import CHEBI_LOW_SYNONYMS, CHARCOUNT_DF, CHEBI_DF

In [6]:
pred = recom.species.getCScores(['hydrogen'])

In [7]:
recom.species.evaluatePredictedSpeciesAnnotation(
                                         pred=pred['hydrogen'],
                                         name_used='hydrogen',
                                         fitted_model=sa.SPECIES_RF)

0.9021236234028662

In [45]:
pred_strs=None
pred_ids=None
update=True
method='cdist'
get_df=False



In [22]:
recom.species.names

{'SAM': 'S-adenosyl-L-methionine',
 'A': 'S-adenosylmethioninamine',
 'P': 'Putrescine',
 'S': 'Spermine',
 'D': 'Spermidine',
 'aS': 'N1-Acetylspermine',
 'aD': 'N1-Acetylspermidine',
 'Met': 'Methionine',
 'ORN': 'L-Ornithine',
 'AcCoA': 'Acetyl-CoA',
 'CoA': 'CoA'}

In [87]:
def applyMSSC(pred,
              mssc,
              cutoff):
  """
  Apply MSSC to a predicted results. 
  
  Parameters
  ----------
  pred: list-tuple
      [(CHEBI:XXXXX, 1.0), etc.]
  mssc: string
  cutoff: float
  
  Returns
  -------
  filt: list-tuple
      [(CHEBI:XXXXX, 1.0), etc.]
  """
  filt_pred = [val for val in pred if val[1]>=cutoff]
  if not filt_pred:
    return []
  if mssc == 'top':
    max_val = np.max([val[1] for val in filt_pred])
    res_pred = [val for val in filt_pred if val[1]==max_val]
  elif mssc == 'above':
    res_pred = filt_pred
  return res_pred

In [95]:
pred_strs = ['S-adenosyl-L-methionine', 'S-adenosylmethioninamine']
# pred_strs = None
# pred_ids = ['SAM', 'A'] 
pred_ids = None
mssc = 'top'
cutoff = 0.0

scoring_methods = {'edist': recom.species.getEScores,
                   'cdist': recom.species.getCScores}
if pred_strs: 
  ids_dict = {k:k for k in pred_strs}
  inp_strs = pred_strs
elif pred_ids:
  ids_dict = {k:recom.species.getNameToUse(inp_id=k) \
              for k in pred_ids}
  inp_strs = [ids_dict[k] for k in ids_dict.keys()]
pred_res = scoring_methods[method](inp_strs)
# convert {name_used:[]} to {id:[]} and apply mssc
conv_res = {k:applyMSSC(pred_res[ids_dict[k]], mssc, cutoff) \
            for k in ids_dict.keys()}

result = []
for spec in conv_res.keys():
  pred_score = recom.species.evaluatePredictedSpeciesAnnotation(pred=conv_res[spec],
                                                                name_used=ids_dict[spec])
  urls = [cn.CHEBI_DEFAULT_URL + val[0][6:] for val in conv_res[spec]]
  labels = [cn.REF_CHEBI2LABEL[val[0]] for val in conv_res[spec]]
  one_recom = cn.Recommendation(spec,
                                np.round(pred_score, cn.ROUND_DIGITS),
                                [(val[0], np.round(val[1], cn.ROUND_DIGITS)) \
                                 for val in conv_res[spec]],
                                urls,
                                labels)
  result.append(one_recom)

In [96]:
result

[Recommendation(id='S-adenosyl-L-methionine', credibility=0.974, candidates=[('CHEBI:15414', 1.0), ('CHEBI:59789', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15414', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A59789'], labels=['S-adenosyl-L-methionine', 'S-adenosyl-L-methionine zwitterion']),
 Recommendation(id='S-adenosylmethioninamine', credibility=0.958, candidates=[('CHEBI:15625', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15625'], labels=['S-adenosylmethioninamine'])]

In [89]:
dummy_res = [('CHEBI:15414', 0.9),('CHEBI:59789', 0.5)]

In [93]:
applyMSSC(dummy_res, mssc='top', cutoff=0.8)

[('CHEBI:15414', 0.9)]

In [38]:
conv_res.keys()

dict_keys(['SAM', 'A'])

In [17]:
recom.species.getNameToUse(inp_id='SAM')

'S-adenosyl-L-methionine'

In [15]:
ids_dict

{None: None}

In [None]:
    if method == 'edist':
      if pred_str:
        name_to_use = pred_str
        given_id = pred_str
      elif pred_id:
        name_to_use = self.species.getNameToUse(inp_id=pred_id)
        given_id = pred_id
      pred_res = self.species.predictAnnotationByEditDistance(name_to_use)  
    elif method == 'cdist':
      if pred_str: 
        given_id = pred_str
      elif pred_id:
        given_id = pred_id
      pred_res = self.species.predictAnnotationByCosineSimilarity(inp_strs=[given_id])[given_id]
    #
    pred_score = self.species.evaluatePredictedSpeciesAnnotation(pred_result=pred_res)
    urls = [cn.CHEBI_DEFAULT_URL + val[6:] for val in pred_res[cn.CHEBI]]
    labels = [cn.REF_CHEBI2LABEL[val] for val in pred_res[cn.CHEBI]]
    result = cn.Recommendation(given_id,
                               np.round(pred_score, cn.ROUND_DIGITS),
                               pred_res[cn.MATCH_SCORE],
                               urls,
                               labels)