In [1]:
# Calculate recall and precision for manuscript/paper and 
# train & test regression model
# Plot in the initial writeup # Plot in the initial writeup 

# It'll be even great if recommender has a self-evaluation function

import collections
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [3]:
# recom.getSpeciesAnnotation(pred_id='M_glc__D_e')

In [4]:
one_spec = 'M_glc__D_e'
one_spec_pred = recom.species.predictAnnotationByEditDistance(recom.species.names[one_spec])
one_spec_pred_form = {one_spec: one_spec_pred[cn.FORMULA]}

In [5]:
tools.getRecall(ref={one_spec:recom.species.exist_annotation_formula[one_spec]},
                pred=one_spec_pred_form)

1.0

In [6]:
tools.getPrecision(ref={one_spec:recom.species.exist_annotation_formula[one_spec]},
                pred=one_spec_pred_form)

1.0

In [21]:
# self.spec_cl = sa.SpeciesAnnotation(libsbml_fpath = E_COLI_PATH)
# self.reac_cl = ra.ReactionAnnotation(libsbml_fpath = E_COLI_PATH)
one_reac = 'R_PFK'
pred_species = recom.species.predictAnnotationByName(inp_spec_list=recom.reactions.reaction_components[one_reac])
pred_reaction = recom.reactions.predictAnnotation(inp_spec_dict=recom.species.formula,
                                                  inp_reac_list=['R_PFK'],
                                                  update=True)
one_rec_pred = pred_reaction['candidates']

In [24]:
tools.getRecall(ref={one_reac:recom.reactions.exist_annotation[one_reac]},
                pred=one_rec_pred)

1.0

In [25]:
tools.getPrecision(ref={one_reac:recom.reactions.exist_annotation[one_reac]},
                   pred=one_rec_pred)

0.16666666666666666

In [26]:
one_spec_pred_form

{'M_glc__D_e': ['C6O6']}

In [27]:
DUMMY_ID = 'SAM'
# Dummy data for calculating accuracy, recalll & precision
DUMMY_REF = {'a': ['ABC', 'BCD'],
              'b': ['DEF']}
DUMMY_PRED = {'a': ['ABC'],
             'b': ['AAA']}

In [29]:
recall2 = tools.getRecall(ref=DUMMY_REF,
                          pred=DUMMY_PRED,
                          mean=False)
recall2
# self.assertEqual(recall1, 0.25)

{'b': 0.0, 'a': 0.5}

In [31]:
precision2 = tools.getPrecision(ref=DUMMY_REF,
                             pred=DUMMY_PRED,
                             mean=False)
precision2

{'b': 0.0, 'a': 1.0}

In [3]:
# Now, get Model statistics for both species and reactions 
# using getRecall and getPrecision. 

# This time, load a BiGG model

# # 190 is for reactions
# one_biomd = 'BIOMD0000000190.xml'

# 634 is for species
one_biomd = 'BIOMD0000000634.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()
recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)

In [4]:
recom.getSpeciesStatistics()

{'recall': 1.0, 'precision': 0.67}

In [5]:
# 190 is for reactions
one_biomd = 'BIOMD0000000190.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()
recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)
recom.getReactionStatistics()

{'recall': 0.67, 'precision': 0.41}

In [6]:
# 190 is for reactions
one_biomd = 'BIOMD0000000190.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()
recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)

In [8]:
refs = recom.reactions.exist_annotation
print(refs)

{'ODC': ['RHEA:22967'], 'SAMdc': ['RHEA:15984'], 'SSAT_for_S': ['RHEA:11119'], 'SSAT_for_D': ['RHEA:11119'], 'PAO_for_aD': ['RHEA:25803'], 'PAO_for_aS': ['RHEA:25803'], 'SpdS': ['RHEA:12724'], 'SpmS': ['RHEA:19976'], 'MAT': ['RHEA:21083']}


In [32]:
import itertools
specs2pred = list(set(itertools.chain(*([recom.reactions.reaction_components[val] for val in refs.keys()]))))
specsdict2pred = {val:recom.species.getNameToUse(val) for val in specs2pred}

In [33]:
specs_predicted = {val:recom.species.predictAnnotationByEditDistance(specsdict2pred[val])[cn.FORMULA] \
                       for val in specs2pred}
specs_predicted

{'Met': ['C5NO2S'],
 'S': ['C10N4'],
 'CoA': ['C21N7O16P3S'],
 'aD': ['C9N3O'],
 'D': ['C7N3'],
 'SAM': ['C15N6O5S'],
 'A': ['C14N6O3S'],
 'aS': ['C12N4O'],
 'P': ['C4N2'],
 'AcCoA': ['C23N7O17P3S'],
 'ORN': ['C5N2O2']}

In [27]:
specs2pred

['Met', 'S', 'CoA', 'aD', 'D', 'SAM', 'A', 'aS', 'P', 'AcCoA', 'ORN']

In [36]:
pred_reaction = recom.reactions.predictAnnotation(inp_spec_dict=specs_predicted,
                                                  inp_reac_list=refs.keys(),
                                                  update=True)[cn.CANDIDATES]

In [37]:
pred_reaction

{'ODC': Index(['RHEA:22967', 'RHEA:28830', 'RHEA:59051'], dtype='object'),
 'SAMdc': Index(['RHEA:15984'], dtype='object'),
 'SSAT_for_S': Index(['RHEA:33102'], dtype='object'),
 'SSAT_for_D': Index(['RHEA:28153', 'RHEA:28273'], dtype='object'),
 'PAO_for_aD': Index(['RHEA:25815'], dtype='object'),
 'PAO_for_aS': Index(['RHEA:25803'], dtype='object'),
 'SpdS': Index(['RHEA:12724'], dtype='object'),
 'SpmS': Index(['RHEA:19976', 'RHEA:30518', 'RHEA:42879'], dtype='object'),
 'MAT': Index(['RHEA:13344', 'RHEA:13764', 'RHEA:15428', 'RHEA:15432', 'RHEA:16588',
        'RHEA:16664', 'RHEA:17612', 'RHEA:19228', 'RHEA:21083', 'RHEA:21823',
        'RHEA:22063', 'RHEA:24843', 'RHEA:26364', 'RHEA:28113', 'RHEA:32158',
        'RHEA:33078', 'RHEA:33086', 'RHEA:34278', 'RHEA:34710', 'RHEA:36342',
        'RHEA:36350', 'RHEA:36354', 'RHEA:37070', 'RHEA:37074', 'RHEA:37078',
        'RHEA:37090', 'RHEA:37434', 'RHEA:39650', 'RHEA:42107', 'RHEA:42635',
        'RHEA:42919', 'RHEA:43335', 'RHEA:47695

In [38]:
tools.getRecall(ref=refs, pred=pred_reaction)

0.6666666666666666

In [39]:
tools.getPrecision(ref=refs, pred=pred_reaction)

0.4092592592592592