In [1]:
# testing ML models
import collections
import copy
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
# # Integrating model with AMAS; 
one_biomd = 'BIOMD0000000634.xml'
one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
recom = recommender.Recommender(libsbml_fpath=one_fpath)
recom.species.exist_annotation
pred_res = recom.species.predictAnnotationByCosineSimilarity(inp_ids=['ATP'])['ATP']
pred_res

{'name_used': 'ATP',
 'chebi': ['CHEBI:74926',
  'CHEBI:30616',
  'CHEBI:37537',
  'CHEBI:15702',
  'CHEBI:53394',
  'CHEBI:15422'],
 'match_score': [('CHEBI:74926', 1.0),
  ('CHEBI:30616', 1.0),
  ('CHEBI:37537', 1.0),
  ('CHEBI:15702', 1.0),
  ('CHEBI:53394', 1.0),
  ('CHEBI:15422', 1.0)],
 'formula': ['C10N5O13P3', 'C36O8', 'C7O4', 'O40PW12', 'C8O4']}

In [3]:
recom.species.evaluatePredictedSpeciesAnnotation(pred_res)

0.8342991901651113

In [4]:
BIOMD_190_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000190.xml')
BIOMD_634_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000634.xml')
ONE_SPEC_CAND = ('CHEBI:15414', 1.0)
ONE_SPEC_URL = 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15414'
TWO_SPEC_CAND = ('CHEBI:15729', 1.0)
TWO_SPEC_URL = 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15729'

ONE_REAC_CAND = ('RHEA:28830', 1.0)
ONE_REAC_URL = 'https://www.rhea-db.org/rhea/28830'

SPECIES_SAM = 'SAM'
SPECIES_SAM_NAME = 'S-adenosyl-L-methionine'
SPECIES_ORN = 'ORN'
SPECIES_ATP = 'ATP'
REACTION_ODC = 'ODC'
REACTION_SAMDC = 'SAMdc'
REACTION_SPMS = 'SpmS'

ONE_CHEBI = 'CHEBI:15414'

In [5]:
recom = recommender.Recommender(libsbml_fpath=BIOMD_190_PATH)

In [6]:
specs = recom.getSpeciesListRecommendation(pred_ids=[SPECIES_SAM, SPECIES_ORN],
                                           update=False, method='edist')
one_res = specs[0]
one_res

Recommendation(id='SAM', credibility=0.974, candidates=[('CHEBI:15414', 1.0), ('CHEBI:59789', 1.0)], urls=['https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A15414', 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A59789'], labels=['S-adenosyl-L-methionine', 'S-adenosyl-L-methionine zwitterion'])

In [7]:
# For species

E_COLI_PATH = os.path.join(cn.TEST_DIR, 'e_coli_core.xml')
BIOMD_248_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000248.xml')
# IDs of species
M_FDP_C = 'M_fdp_c'
M_ATP_C = 'M_atp_c'
M_AMP_C = 'M_amp_c'
M_GLUCOSE = 'M_glc__D_e'
D_GLUCOSE = 'D-Glucose'

ONESET_SPECIES_IDS = [M_FDP_C, M_ATP_C]
ONE_CHEBI = 'CHEBI:15414'
ATP_CHEBI = 'CHEBI:30616'
ATP_FORMULA = 'C10N5O13P3'
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = E_COLI_PATH)

In [8]:
fdp_pred_spec = spec_cl.predictAnnotationByEditDistance(inp_str=M_FDP_C)
fdp_score = spec_cl.evaluatePredictedSpeciesAnnotation(pred_result=fdp_pred_spec)
fdp_score

0.8590126303427104

In [17]:
atp_pred_spec = spec_cl.predictAnnotationByEditDistance(inp_str=M_ATP_C)
atp_score = spec_cl.evaluatePredictedSpeciesAnnotation(pred_result=atp_pred_spec)
atp_score

0.9070469526596386

In [10]:
atp_pred_spec

{'M_atp_c': {'name_used': 'ATP C10H12N5O13P3',
  'chebi': ['CHEBI:153955'],
  'match_score': [('CHEBI:153955', 0.84)],
  'formula': ['C30N4O29P3']}}

In [11]:
# For reactions
E_COLI_PATH = os.path.join(cn.TEST_DIR, 'e_coli_core.xml')
BIOMD_248_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000248.xml')
# ID of a reaction
R_PFK = 'R_PFK'
ATP = 'M_atp_c'
COMPONENTS = {'M_fdp_c', 'M_adp_c', 'M_atp_c', 'M_f6p_c', 'M_h_c'}
ONE_CANDIDATE = 'RHEA:12423'
ONE_CHEBI = 'CHEBI:30616'

# Dummy data for calculating accuracy, recalll & precision
DUMMY_REF = {'a': ['ABC', 'BCD'],
              'b': ['DEF']}
DUMMY_PRED = {'a': ['ABC'],
             'b': ['AAA']}
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [12]:
pred_species = recom.species.predictAnnotationByCosineSimilarity(inp_ids=COMPONENTS)
spec_formula_dict = {val: pred_species[val][cn.FORMULA] for val in COMPONENTS}  
pred_reaction = recom.reactions.predictAnnotation(inp_spec_dict=spec_formula_dict,
                                                  inp_reac_list=[R_PFK],
                                                  update=True)
cred_res = recom.reactions.evaluatePredictedReactionAnnotation(pred_result=pred_reaction)
np.round(cred_res[R_PFK], cn.ROUND_DIGITS)

0.811

In [13]:
REACTION_ODC = 'ODC'
REACTION_SAMDC = 'SAMdc'

one_biomd = 'BIOMD0000000190.xml'
one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
recom = recommender.Recommender(libsbml_fpath=one_fpath)

recom.getReactionRecommendation(REACTION_ODC)

Recommendation(id='ODC', credibility=0.815, candidates=[('RHEA:28827', 1.0), ('RHEA:22964', 0.5), ('RHEA:59048', 0.5)], urls=['https://www.rhea-db.org/rhea/28827', 'https://www.rhea-db.org/rhea/22964', 'https://www.rhea-db.org/rhea/59048'], labels=['L-ornithine(out) + putrescine(in) = L-ornithine(in) + putrescine(out)', 'ornithine decarboxylase activity', 'D-ornithine + H(+) = CO2 + putrescine'])