In [1]:
# Update reaction annotations
import collections
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
one_biomd = 'BIOMD0000000190.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()

In [3]:
BIOMD_248_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000248.xml')
# ID of a reaction
R_PFK = 'R_PFK'
ATP = 'M_atp_c'
COMPONENTS = {'M_fdp_c', 'M_adp_c', 'M_atp_c', 'M_f6p_c', 'M_h_c'}
ONE_CANDIDATE = 'RHEA:12423'
ONE_CHEBI = 'CHEBI:30616'

# Dummy data for calculating accuracy, recalll & precision
DUMMY_REF = {'a': ['ABC', 'BCD'],
              'b': ['DEF']}
DUMMY_PRED = {'a': ['ABC'],
             'b': ['AAA']}

In [4]:
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = ecoli_fpath)
reac_cl = ra.ReactionAnnotation(libsbml_fpath = ecoli_fpath)
pred_species = {val: spec_cl.predictAnnotationByEditDistance(inp_str=spec_cl.names[val]) \
                for val in COMPONENTS}
spec_formula_dict = {val: pred_species[val][cn.FORMULA] for val in COMPONENTS}                                                             



In [5]:
pred_reaction = reac_cl.predictAnnotation(inp_spec_dict=spec_formula_dict,
                                          inp_reac_list=[R_PFK],
                                          update=True)

In [6]:
one_eval = reac_cl.evaluatePredictedReactionAnnotation(inp_dict=pred_reaction)
print(one_eval)

{'R_PFK': 0.9119592634489692}


In [7]:
match_scores_dict = pred_reaction[cn.MATCH_SCORE]

In [8]:
match_scores_dict

{'R_PFK': [('RHEA:12423', 0.8),
  ('RHEA:13380', 0.8),
  ('RHEA:14216', 0.8),
  ('RHEA:15656', 0.8),
  ('RHEA:16112', 0.8),
  ('RHEA:20108', 0.8)]}

In [9]:
from AMAS import recommender
recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)

In [10]:
recom.reactions.exist_annotation

{'ODC': ['RHEA:22967'],
 'SAMdc': ['RHEA:15984'],
 'SSAT_for_S': ['RHEA:33102', 'RHEA:28273', 'RHEA:28153', 'RHEA:11119'],
 'SSAT_for_D': ['RHEA:33102', 'RHEA:28273', 'RHEA:28153', 'RHEA:11119'],
 'PAO_for_aD': ['RHEA:25803'],
 'PAO_for_aS': ['RHEA:25803'],
 'SpdS': ['RHEA:12724'],
 'SpmS': ['RHEA:19976'],
 'MAT': ['RHEA:21083']}

In [11]:
recom.getReactionStatistics()

{'recall': 0.75, 'precision': 0.63}

In [12]:
recom.getSpeciesStatistics()

In [26]:
recom.getReactionAnnotation(pred_id=R_PFK)

Recommendation(id='R_PFK', credibility=0.91, candidates=[('RHEA:12423', 0.8), ('RHEA:13380', 0.8), ('RHEA:14216', 0.8), ('RHEA:15656', 0.8), ('RHEA:16112', 0.8), ('RHEA:20108', 0.8)], urls=['https://www.rhea-db.org/rhea/12423', 'https://www.rhea-db.org/rhea/13380', 'https://www.rhea-db.org/rhea/14216', 'https://www.rhea-db.org/rhea/15656', 'https://www.rhea-db.org/rhea/16112', 'https://www.rhea-db.org/rhea/20108'])

In [6]:
# recom.reactions.reaction_components

In [7]:
R_PFK = 'R_PFK'
ATP = 'M_atp_c'
COMPONENTS = {'M_fdp_c', 'M_adp_c', 'M_atp_c', 'M_f6p_c', 'M_h_c'}
ONE_CANDIDATE = 'RHEA:12423'
ONE_CHEBI = 'CHEBI:30616'

spec_cl = sa.SpeciesAnnotation(libsbml_fpath = ecoli_fpath)
reac_cl = ra.ReactionAnnotation(libsbml_fpath = ecoli_fpath)
pred_species = spec_cl.predictAnnotationByName(inp_spec_list=COMPONENTS)
pred_reaction = reac_cl.predictAnnotation(inp_spec_dict=spec_cl.formula,
                                          inp_reac_list=[R_PFK])

In [8]:
reac_cl.evaluatePredictedReactionAnnotation(inp_dict=pred_reaction)

{'R_PFK': 0.9119592634489692}

In [9]:
one_pred_dict = pred_reaction['candidates']
reac_cl.getRecall(pred_annotation=one_pred_dict)

1.0

In [10]:
reac_cl.getPrecision(pred_annotation=one_pred_dict)

0.16666666666666666

In [12]:
pred_reaction['candidates']

{'R_PFK': Index(['RHEA:12423', 'RHEA:13380', 'RHEA:14216', 'RHEA:15656', 'RHEA:16112',
        'RHEA:20108'],
       dtype='object')}

In [9]:
inp_dict = pred_reaction

candidates_dict = inp_dict['candidates']

cands_num_dict = {one_k: len(candidates_dict[one_k]) for one_k in candidates_dict.keys()}
inp_list = list(inp_dict['candidates'].keys())
num_candidates = [cands_num_dict[val] for val in inp_list]
# num_candidates = [len(candidates_info[val]) for val in inp_list]
multi_mat = ra.ref_mat.dot(inp_dict[cn.QUERY_DF])
maxes = multi_mat.max()



max_match = [maxes[val] for val in inp_list]

match_scores = inp_dict[cn.MATCH_SCORE]


mean_match_score = [np.mean([val[1] for val in match_scores[k]]) for k in inp_list]
# med_match_score = [np.median([val[1] for val in match_scores[k]]) for k in inp_list]
# min_match_score = [np.min([val[1] for val in match_scores[k]]) for k in inp_list]
# max_match_score = [np.max([val[1] for val in match_scores[k]]) for k in inp_list]
# var_match_score = [np.var([val[1] for val in match_scores[k]]) for k in inp_list]
# data2prediction = list(zip(num_candidates,
#                            max_match,
#                            mean_match_score,
#                            med_match_score,
#                            min_match_score,
#                            max_match_score,
#                            var_match_score))
# pred_probs = fitted_model.predict(data2prediction)
# # Collect probability to be correct
# res = {val[0]:val[1] for val in list(zip(inp_list, pred_probs))}

In [28]:
mean_match_score

[0.7999999999999999]

# Updating test_reaction_annotation

In [10]:
R_PFK = 'R_PFK'
ATP = 'M_atp_c'
COMPONENTS = {'M_fdp_c', 'M_adp_c', 'M_atp_c', 'M_f6p_c', 'M_h_c'}
ONE_CANDIDATE = 'RHEA:12423'
ONE_CHEBI = 'CHEBI:30616'
BIOMD_248_PATH = os.path.join(os.getcwd(), 'BIOMD0000000248.xml')
E_COLI_PATH = os.path.join(BIGG_DIR, "e_coli_core.xml")

from AMAS import recommender
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [12]:
inp_dict = pred_reaction['candidates']
cands_num_dict = {one_k: len(inp_dict[one_k]) for one_k in inp_dict.keys()}
print(cands_num_dict)

{'R_PFK': 6}


IndexError: tuple index out of range

In [28]:
print([val[0] for val in pred_reaction[cn.MATCH_SCORE][R_PFK]])
ONE_CANDIDATE in [val[0] for val in pred_reaction[cn.MATCH_SCORE][R_PFK]]

['RHEA:12423', 'RHEA:13380', 'RHEA:14216', 'RHEA:15656', 'RHEA:16112', 'RHEA:20108']


True

In [31]:
one_match_score = {'R1': [('RHEA:1', 1.0), ('RHEA:2', 0.5)]}
reac_cl.getBestOneCandidates(one_match_score)['R1']

['RHEA:1']

In [32]:
reac_cl.candidates

{'R_PFK': Index(['RHEA:12423', 'RHEA:13380', 'RHEA:14216', 'RHEA:15656', 'RHEA:16112',
        'RHEA:20108'],
       dtype='object')}

In [33]:
pred = {R_PFK: ['RHEA:16112']}
reac_cl.getAccuracy(pred_annotation=pred)

1.0