In [1]:
# Update iterator algorithm; and making recommendations; 
import collections
import compress_pickle
import copy
import itertools
import libsbml
import numpy as np
import operator
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

from AMAS import annotation_maker as am
from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

spec_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_species_accuracy.csv'))
biomds_spec = np.unique(spec_ml_df['model'])
print("Number of BioModels to be analyzed for SPECIES: %d" % len(biomds_spec))

reac_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_reactions_accuracy.csv'))
biomds_reac = np.unique(reac_ml_df['model'])
print("Number of BioModels to be analyzed for REACTIONS: %d" % len(biomds_reac))

Number of BioModels to be analyzed for SPECIES: 306
Number of BioModels to be analyzed for REACTIONS: 131


In [2]:
one_fpath = os.path.join(BIOMODEL_DIR, 'BIOMD0000000190.xml')
recom = recommender.Recommender(libsbml_fpath=one_fpath)

In [3]:
recom.getSpeciesIDs()

['SAM', 'A', 'P', 'S', 'D', 'aS', 'aD', 'Met', 'ORN', 'AcCoA', 'CoA']

In [4]:
recom.recommendSpecies(min_len=4000)

No species after the element filter.


In [5]:
recom.recommendReactions(min_len=40)

No reaction after the element filter.


In [6]:
rec = recom.recommendReactions(ids=['ODC', 'SAMdc'])
rec

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,UPDATE ANNOTATION
0,BIOMD0000000190.xml,reaction,ODC,Ornithine decarboxylase,metaid_0000062,RHEA:22964,ornithine decarboxylase activity,1.0,1,keep
1,BIOMD0000000190.xml,reaction,ODC,Ornithine decarboxylase,metaid_0000062,RHEA:28827,L-ornithine(out) + putrescine(in) = L-ornithine(in) + putrescine(out),1.0,0,ignore
2,BIOMD0000000190.xml,reaction,ODC,Ornithine decarboxylase,metaid_0000062,RHEA:59048,D-ornithine + H(+) = CO2 + putrescine,1.0,0,ignore
3,BIOMD0000000190.xml,reaction,SAMdc,S-adenosylmethionine decarboxylase,metaid_0000063,RHEA:15981,adenosylmethionine decarboxylase activity,0.5,1,keep


In [7]:
spec_res = recom.getSpeciesListRecommendation(pred_ids=['SAM', 'ORN'],
                                              get_df=True)

In [8]:
spec_res

[      annotation  match score                               label
 SAM                                                              
 1    CHEBI:15414          1.0             S-adenosyl-L-methionine
 2    CHEBI:59789          1.0  S-adenosyl-L-methionine zwitterion,
       annotation  match score              label
 ORN                                             
 1    CHEBI:15729          1.0        L-ornithine
 2    CHEBI:46911          1.0  L-ornithinium(1+)]

In [9]:
spec_rec = recom.recommendSpecies(['SAM', 'ORN'])
spec_rec

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,UPDATE ANNOTATION
0,BIOMD0000000190.xml,species,SAM,S-adenosyl-L-methionine,metaid_0000036,CHEBI:15414,S-adenosyl-L-methionine,1.0,1,keep
1,BIOMD0000000190.xml,species,SAM,S-adenosyl-L-methionine,metaid_0000036,CHEBI:59789,S-adenosyl-L-methionine zwitterion,1.0,0,ignore
2,BIOMD0000000190.xml,species,ORN,L-Ornithine,metaid_0000044,CHEBI:15729,L-ornithine,1.0,1,keep
3,BIOMD0000000190.xml,species,ORN,L-Ornithine,metaid_0000044,CHEBI:46911,L-ornithinium(1+),1.0,0,ignore


In [10]:
specs = recom.getSpeciesIDs()
print("...\nAnalyzing %d species...\n" % len(specs))
res_spec = recom.getSpeciesListRecommendation(pred_ids=specs)
cur_form = dict()
for one_rec in res_spec:
  cands = [val[0] for val in one_rec.candidates]
  one_form = list(set([cn.REF_CHEBI2FORMULA[k] \
                    for k in cands if k in cn.REF_CHEBI2FORMULA.keys()]))
  cur_form[one_rec.id] = one_form

reacts = recom.getReactionIDs()
print("...\nAnalyzing %d reaction(s)...\n" % len(reacts))
res_reac = recom.getReactionListRecommendation(pred_ids=reacts,
                                             spec_res=res_spec)

...
Analyzing 11 species...

...
Analyzing 13 reaction(s)...



In [11]:
cur_form

{'SAM': ['C15N6O5S'],
 'A': ['C14N6O3S'],
 'P': ['C4N2'],
 'S': ['C10N4'],
 'D': ['C7N3'],
 'aS': ['C12N4O'],
 'aD': ['C9N3O'],
 'Met': ['C5NO2S'],
 'ORN': ['C5N2O2'],
 'AcCoA': ['C23N7O17P3S'],
 'CoA': ['C21N7O16P3S', 'C2O2']}

In [12]:
type(reacts)

list

In [13]:
# def optimizePrediction(cur_spec_formula,
#                        reaction_cl,
#                        reactions_to_update):
#   """
#   Optimize prediction using iteration.
  
#   Parameters
#   ----------
#   cur_spec_formula: dict
#       (predicted) formula of species

#   reaction_cl: AMAS.reaction_annotation.ReactionAnnotation
  
#   reactions_to_update: list
#       IDs of reactions
      
#   Returns
#   -------
#   fin_spec_recom: Recommendation (namedtuple)
  
#   fin_reac_recom: Recommendation (namedtuple)
#   """
#   anot_iter = it.Iterator(cur_spec_formula=cur_spec_formula,
#                           reaction_cl=reaction_cl,
#                           reactions_to_update=reactions_to_update)
#   res_iter = anot_iter.match()
#   recoms_tobe_added = []
#   for one_spec in res_iter.keys():
#     pred_reacs = [val.id for val in res_reac]
#     reacs_using_one_spec = [val for val in pred_reacs \
#                             if one_spec in reaction_cl.reaction_components[val]]
#     filt_res_reac = [val for val in res_reac if val.id in reacs_using_one_spec]
#     # match score of reactions using that species
#     adj_match_score = np.mean(list(itertools.chain(*[[cand[1] for cand in val.candidates] \
#                               for val in filt_res_reac])))
#     cands = res_iter[one_spec]
#     adj_formulas = list(set([cn.REF_CHEBI2FORMULA[k] \
#                              for k in cands if k in cn.REF_CHEBI2FORMULA.keys()]))
#     urls = [cn.CHEBI_DEFAULT_URL + val[6:] for val in cands]
#     labels = [cn.REF_CHEBI2LABEL[val] for val in cands]
#     adj_recom = cn.Recommendation(one_spec,
#                                   [(val, adj_match_score) for val in cands],
#                                   urls,
#                                   labels)
#     recoms_tobe_added.append(adj_recom)
#   fin_spec_recom = recoms_tobe_added + \
#                    [val for val in res_spec if val.id not in res_iter.keys()]
#   fin_reac_recom = recom.getReactionListRecommendation(pred_ids=reacts,
#                                                        spec_res=fin_spec_recom)
#   return fin_spec_recom, fin_reac_recom

In [5]:
for idx, one_biomd in enumerate(['BIOMD0000000017.xml']):
  if idx % 20 == 0:
    print("we are at", idx)
  
  one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)

  specs = recom.getSpeciesIDs()
  res_spec = recom.getSpeciesListRecommendation(pred_ids=specs)
  # cur_form = dict()
  # for one_rec in res_spec:
  #   cur_cands = [val[0] for val in one_rec.candidates]
  #   one_form = list(set([cn.REF_CHEBI2FORMULA[k] \
  #                     for k in cur_cands if k in cn.REF_CHEBI2FORMULA.keys()]))
  #   cur_form[one_rec.id] = one_form

  reacts = recom.getReactionIDs() 
  res_reac = recom.getReactionListRecommendation(pred_ids=reacts,
                                                 spec_res=res_spec)
  opt_spec_recom, opt_reac_recom = recom.optimizePrediction(pred_spec=res_spec,
                                                            pred_reac=res_reac,
                                                            reactions_to_update=reacts)
  oner = opt_spec_recom!=res_spec
  twor = opt_reac_recom!=res_reac
  if any([oner, twor]):
    print("Found!")
    break

we are at 0
Found!


In [9]:
[val for val in res_spec if val.id == 'AcetoinIn']

[Recommendation(id='AcetoinIn', candidates=[('CHEBI:2430', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A2430'], labels=['aconitine'])]

In [10]:
[val for val in opt_spec_recom if val.id == 'AcetoinIn']

[Recommendation(id='AcetoinIn', candidates=[('CHEBI:15378', 0.98792749244713)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15378'], labels=['hydron'])]

In [3]:
for idx, one_biomd in enumerate(biomds_spec[3:5]):
  if idx % 20 == 0:
    print("we are at", idx)
  
  one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)

  specs = recom.getSpeciesIDs()
  res_spec = recom.getSpeciesListRecommendation(pred_ids=specs)
  cur_form = dict()
  for one_rec in res_spec:
    cur_cands = [val[0] for val in one_rec.candidates]
    one_form = list(set([cn.REF_CHEBI2FORMULA[k] \
                      for k in cur_cands if k in cn.REF_CHEBI2FORMULA.keys()]))
    cur_form[one_rec.id] = one_form

  reacts = recom.getReactionIDs() 
  res_reac = recom.getReactionListRecommendation(pred_ids=reacts,
                                                 spec_res=res_spec)
  ## 
  anot_iter = it.Iterator(cur_spec_formula=cur_form,
                          reaction_cl=recom.reactions,
                          reactions_to_update=reacts)
  res_iter = anot_iter.match()
  recoms_tobe_added = []
  for one_spec in res_iter.keys():
    pred_reacs = [val.id for val in res_reac]
    reacs_using_one_spec = [val for val in pred_reacs \
                            if one_spec in recom.reactions.reaction_components[val]]
    filt_res_reac = [val for val in res_reac if val.id in reacs_using_one_spec]
    # match score of reactions using that species
    adj_match_score = np.mean(list(itertools.chain(*[[cand[1] for cand in val.candidates] \
                              for val in filt_res_reac])))
    cands = res_iter[one_spec]
    adj_formulas = list(set([cn.REF_CHEBI2FORMULA[k] \
                             for k in cands if k in cn.REF_CHEBI2FORMULA.keys()]))
    urls = [cn.CHEBI_DEFAULT_URL + val[6:] for val in cands]
    labels = [cn.REF_CHEBI2LABEL[val] for val in cands]
    adj_recom = cn.Recommendation(one_spec,
                                  [(val, adj_match_score) for val in cands],
                                  urls,
                                  labels)
    recoms_tobe_added.append(adj_recom)
  fin_spec_recom = recoms_tobe_added + \
                   [val for val in res_spec if val.id not in res_iter.keys()]
  fin_reac_recom = recom.getReactionListRecommendation(pred_ids=reacts,
                                                       spec_res=fin_spec_recom)
  oner = fin_spec_recom!=res_spec
  twor = fin_reac_recom!=res_reac
  if any([oner, twor]):
    print("Found!")
    break

we are at 0
Found!


In [57]:
idx

3

In [22]:
res_iter

{}

In [None]:
# combined

In [None]:
def recommendAnnotation(mssc='top',
                        cutoff=0.0,
                        outtype='table'):
  """
  Recommend annotation of 
  """

In [13]:
recom.species.getNameToUse('SAM')

'S-adenosyl-L-methionine'

In [19]:
# getRecomSBML
model = recom.sbml_document.getModel()
ELEMENT_FUNC = {'species': model.getSpecies,
                'reaction': model.getReaction}
#######################
one_type = 'species'
one_recomt = spec_recomt
#######################
maker = am.AnnotationMaker(one_type)

In [None]:
model = self.sbml_document.getModel()
ELEMENT_FUNC = {'species': model.getSpecies,
                'reaction': model.getReaction}
# dictionary with empty lists; 
saved_elements = {k:[] for k in ELEMENT_TYPES}
for one_type in ELEMENT_TYPES:
  type_selection = self.selection[one_type]
  maker = am.AnnotationMaker(one_type)
  sel2save = type_selection


  for one_k in sel2save.keys():
    one_element = ELEMENT_FUNC[one_type](one_k)
    meta_id = one_element.meta_id
    df = sel2save[one_k]
    cands2save = list(df['annotation'])
    if cands2save:
      if option == 'augment':
        orig_annotation = one_element.getAnnotationString()
        annotation_str = maker.addAnnotation(cands2save,
                                             orig_annotation,
                                             meta_id)
      elif option == 'replace':
        annotation_str = maker.getAnnotationString(cands2save, meta_id)
      one_element.setAnnotation(annotation_str)
      saved_elements[one_type].append(one_k)
    else:
      continue