In [1]:
# testing remaining recommender methods
import collections

import compress_pickle
import copy
import editdistance
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
FIGURE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/AMAS_suppl/figure_files'
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools
# import warnings

In [2]:
BIOMD_190_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000190.xml')
min_score = 0.0
method = 'top'

In [3]:
reacs = None
min_score = 0.5
method = 'top'
# outfile = 'species_rec.csv'

recom = recommender.Recommender(libsbml_fpath=BIOMD_190_PATH)
recom.current_type = 'reaction'
# if nothing is given, predict all IDs
if reacs is None:
  reacs = recom.getReactionIDs()
print("...\nAnalyzing %d reactions...\n" % len(reacs))
res = recom.getReactionListRecommendation(pred_ids=reacs, get_df=True)
for idx, one_df in enumerate(res):
  filt_df = recom.autoSelectAnnotation(df=one_df,
                                       min_score=min_score,
                                       method=method)
  recom.updateSelection(reacs[idx], filt_df)
# # save file to csv
# recom.saveToCSV(outfile)
# print("Recommendations saved as:\n%s\n" % os.path.abspath(outfile))

...
Analyzing 13 reactions...



In [6]:
empties = []
for k in recom.selection['reaction']:
  if recom.selection['reaction'][k].shape[0] == 0:
    empties.append(k)
print(empties)

['PAO_for_aD', 'PAO_for_aS', 'aD_efflux']


In [20]:
for one_t in recom.selection.keys():
  select_type = recom.selection[one_t]
  filt_select_type = {k:select_type[k] for k in select_type.keys() \
                     if select_type[k].shape[0] > 0}
  recom.selection[one_t] = filt_select_type

In [24]:
len(recom.selection['reaction'])

10

In [9]:
model = recom.sbml_document.getModel()
TYPE_EXISTING_ATTR = {'species': recom.species.exist_annotation,
                      'reaction': recom.reactions.exist_annotation}
ELEMENT_FUNC = {'species': model.getSpecies,
                'reaction': model.getReaction}
pd.set_option('display.max_colwidth', 255)

In [13]:
one_type = 'reaction'
# edf: element_df
edfs = []    
# for one_type in recom.selection.keys():
type_selection = recom.selection[one_type]
for k in list(type_selection.keys()):   
  one_edf = type_selection[k]
  if one_edf.shape[0] == 0:
    print("Continuing: %s" % k)
    continue
  annotations = list(one_edf['annotation'])
  match_scores = list(one_edf[cn.DF_MATCH_SCORE_COL])
  labels = list(one_edf['label'])
  # if there is existing annotation among predicted candidates;
  if k in TYPE_EXISTING_ATTR[one_type].keys():
    existings = [1 if val in TYPE_EXISTING_ATTR[one_type][k] else 0 \
               for idx, val in enumerate(one_edf['annotation'])]
    upd_annotation = ['keep' if val in TYPE_EXISTING_ATTR[one_type][k] else 'ignore' \
                        for idx, val in enumerate(one_edf['annotation'])]
    annotation2add = [val for val in TYPE_EXISTING_ATTR[one_type][k] \
                        if val not in list(one_edf['annotation'])]
    # if there doesn't exist existing annotataion among predicted candidates;
  else:
    existings = [0] * len(annotations)
    upd_annotation = ['ignore'] * len(annotations)
    annotation2add = []
  # handling existing annotations that were not predicted
  for new_anot in annotation2add:
    annotations.append(new_anot)
    if one_type=='reaction':
      match_scores.append(recom.getMatchScoreOfRHEA(k, new_anot))
      labels.append(cn.REF_RHEA2LABEL[new_anot])
    elif one_type=='species':
      match_scores.append(recom.getMatchScoreOfCHEBI(k, new_anot))
      labels.append(cn.REF_CHEBI2LABEL[new_anot])
    existings.append(1)
    upd_annotation.append('keep')
  new_edf = pd.DataFrame({'type': [one_type]*len(annotations),
                            'id': [k]*len(annotations),
                            'display name': [ELEMENT_FUNC[one_type](k).name]*len(annotations),
                            'meta id': [ELEMENT_FUNC[one_type](k).meta_id]*len(annotations),
                            'annotation': annotations,
                            'annotation label': labels,
                            cn.DF_MATCH_SCORE_COL: match_scores,
                            'existing': existings,
                            cn.DF_UPDATE_ANNOTATION_COL: upd_annotation})
  edfs.append(new_edf)
res = pd.concat(edfs)

Continuing: PAO_for_aD
Continuing: PAO_for_aS
Continuing: aD_efflux


In [16]:
len(np.unique(res['id']))

10

In [23]:
print('Choose either "top" or "above". "top" recommends ' +\
                                       'the best annotations that are above the min_score, ' +\
                                       'and "above" recommends all annotations that are above ' +\
                                       'the min_score. Default is "top".')

Choose either "top" or "above". "top" recommends the best annotations that are above the min_score, and "above" recommends all annotations that are above the min_score. Default is "top".


In [18]:
recom.reactions.reaction_components

{'ODC': ['P', 'ORN'],
 'SAMdc': ['A', 'SAM'],
 'SSAT_for_S': ['S', 'AcCoA', 'CoA', 'aS'],
 'SSAT_for_D': ['AcCoA', 'CoA', 'aD', 'D'],
 'PAO_for_aD': ['P', 'aD'],
 'PAO_for_aS': ['aS', 'D'],
 'SpdS': ['A', 'P', 'D'],
 'SpmS': ['A', 'S', 'D'],
 'MAT': ['Met', 'SAM'],
 'VCoA': ['AcCoA', 'CoA'],
 'VacCoA': ['AcCoA', 'CoA'],
 'P_efflux': ['P'],
 'aD_efflux': ['aD']}

In [16]:
one_path = os.getcwd()
one_path

'/Users/woosubs/Desktop/AutomateAnnotation/AMAS/notebooks'

In [21]:
os.path.isfile('updated_model.xml')

True

In [2]:

BIOMD_634_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000634.xml')
E_COLI_PATH = os.path.join(cn.TEST_DIR, 'e_coli_core.xml')
ONE_SPEC_CAND = ('CHEBI:15414', 1.0)
ONE_SPEC_URL = 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15414'
TWO_SPEC_CAND = ('CHEBI:15729', 1.0)
TWO_SPEC_URL = 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15729'

ONE_REAC_CAND = ('RHEA:28827', 1.0)
ONE_REAC_URL = 'https://www.rhea-db.org/rhea/28827'

SPECIES_SAM = 'SAM'
SPECIES_SAM_NAME = 'S-adenosyl-L-methionine'
SPECIES_ORN = 'ORN'
SPECIES_ATP = 'ATP'
REACTION_ODC = 'ODC'
REACTION_SAMDC = 'SAMdc'
REACTION_SPMS = 'SpmS'
R_PFK = 'R_PFK'
R_PFL = 'R_PFL'
ECOLI_REACTIONS = [R_PFK, R_PFL]
ECOLI_ATP = 'M_atp_c'
ECOLI_RHEA = 'RHEA:12420'

ONE_CHEBI = 'CHEBI:15414'
ATP_CHEBI = 'CHEBI:30616'
FORMULA_ATP = 'C10N5O13P3'

RESULT_RECOM = cn.Recommendation('R_PFK', 0.817,
                                 [('RHEA:12420', 0.6), ('RHEA:13377', 0.6)],
                                 ['https://www.rhea-db.org/rhea/12420', 'https://www.rhea-db.org/rhea/13377'],
                                 ['tagatose-6-phosphate kinase activity', 'phosphoglucokinase activity'])

In [3]:
recom = recommender.Recommender(libsbml_fpath=BIOMD_190_PATH)

In [11]:
recom.saveToCSV()

In [12]:
df = pd.read_csv('recommendation.csv')

In [13]:
df

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,USE ANNOTATION
0,BIOMD0000000190.xml,species,SAM,S-adenosyl-L-methionine,metaid_0000036,CHEBI:15414,S-adenosyl-L-methionine,1.0,1,0


In [17]:
recom.saveToSBML('biomd190_upd.xml')

Annotation recommended for 1 species:
[SAM]



In [18]:
recom2 = recommender.Recommender(libsbml_fpath='biomd190_upd.xml')

In [20]:
recom2.species.exist_annotation['SAM']

['CHEBI:15414']

In [22]:
recom.printSummary(saved=['SAM', 'A'], element_type='species')

Annotation recommended for 2 species:
[SAM, A]



In [23]:
'Annotation recommended for 2 species:\n[SAM, A]'

'Annotation recommended for 2 species:\n + [SAM, A]'

In [24]:
recom.getReactionIDs()

['ODC',
 'SAMdc',
 'SSAT_for_S',
 'SSAT_for_D',
 'PAO_for_aD',
 'PAO_for_aS',
 'SpdS',
 'SpmS',
 'MAT',
 'VCoA',
 'VacCoA',
 'P_efflux',
 'aD_efflux']

In [26]:
recom.printSummary(saved=['ODC', 'SAMdc', 'SSAT_for_S'], element_type='reaction')

Annotation recommended for 3 reaction(s):
[ODC, SAMdc, SSAT_for_S]

