In [1]:
# Update reaction annotations
import collections
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
one_biomd = 'BIOMD0000000190.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
model = document.getModel()

In [3]:
model.getListOfSpecies()

<ListOfSpecies[11]>

In [4]:
from AMAS import recommender
recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)

In [5]:
recom.species.formula

{}

In [6]:
recom.reactions.reaction_components

{'ODC': ['P', 'ORN'],
 'SAMdc': ['A', 'SAM'],
 'SSAT_for_S': ['S', 'aS', 'CoA', 'AcCoA'],
 'SSAT_for_D': ['aD', 'CoA', 'AcCoA', 'D'],
 'PAO_for_aD': ['aD', 'P'],
 'PAO_for_aS': ['aS', 'D'],
 'SpdS': ['D', 'A', 'P'],
 'SpmS': ['S', 'A', 'D'],
 'MAT': ['Met', 'SAM'],
 'VCoA': ['CoA', 'AcCoA'],
 'VacCoA': ['CoA', 'AcCoA'],
 'P_efflux': ['P'],
 'aD_efflux': ['aD']}

In [7]:
reactions_to_annotate = ['SAMdc', 'SSAT_for_S']
# First, collect all species IDs to annotate
specs_to_annotate = list(set(itertools.chain(*[recom.reactions.reaction_components[val] \
                                               for val in reactions_to_annotate])))
print(specs_to_annotate)
# Why don't I just predict all species and continue? 
spec_results = recom.getSpeciesAnnotation(specs_to_annotate)
spec_results
# automatically created
pred_formulas = recom.species.formula
print(pred_formulas)

['S', 'CoA', 'aS', 'A', 'SAM', 'AcCoA']
{'S': ['C10N4'], 'CoA': ['C21N7O16P3S'], 'aS': ['C12N4O'], 'A': ['C14N6O3S'], 'SAM': ['C15N6O5S'], 'AcCoA': ['C23N7O17P3S']}


In [8]:
pred_reaction = recom.reactions.predictAnnotation(inp_spec_dict=pred_formulas,
                                                  inp_reac_list=reactions_to_annotate)

In [9]:
recom.reactions.match_score

{'SAMdc': [('RHEA:15984', 0.5)], 'SSAT_for_S': [('RHEA:33102', 0.8)]}

In [10]:
# match_scores = recom.reactions.match_score
# inp_list = reactions_to_annotate
# mean_match_score = [np.mean([val[1] for val in match_scores[k]]) for k in inp_list]
# mean_match_score

In [11]:
recom.reactions.evaluatePredictedReactionAnnotation(reactions_to_annotate)

{'SAMdc': 0.38447097442545614, 'SSAT_for_S': 0.9001079610299029}

In [12]:
pred_reaction

{'SAMdc': [('RHEA:15984', 0.5)], 'SSAT_for_S': [('RHEA:33102', 0.8)]}

# Updating test_reaction_annotation

In [16]:
R_PFK = 'R_PFK'
ATP = 'M_atp_c'
COMPONENTS = {'M_fdp_c', 'M_adp_c', 'M_atp_c', 'M_f6p_c', 'M_h_c'}
ONE_CANDIDATE = 'RHEA:12423'
ONE_CHEBI = 'CHEBI:30616'
BIOMD_248_PATH = os.path.join(os.getcwd(), 'BIOMD0000000248.xml')
E_COLI_PATH = os.path.join(BIGG_DIR, "e_coli_core.xml")

from AMAS import recommender
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [19]:
spec_cl = sa.SpeciesAnnotation(libsbml_fpath = E_COLI_PATH)
reac_cl = ra.ReactionAnnotation(libsbml_fpath = E_COLI_PATH)
pred_species = spec_cl.predictAnnotationByName(inp_spec_list=COMPONENTS)
pred_reaction = reac_cl.predictAnnotation(inp_spec_dict=spec_cl.formula,
                                          inp_reac_list=[R_PFK])

In [23]:
print([val[0] for val in reac_cl.match_score[R_PFK]])
ONE_CANDIDATE in [val[0] for val in reac_cl.match_score[R_PFK]]

['RHEA:12423', 'RHEA:13380', 'RHEA:14216', 'RHEA:15656', 'RHEA:16112', 'RHEA:20108']


True

In [31]:
one_match_score = {'R1': [('RHEA:1', 1.0), ('RHEA:2', 0.5)]}
reac_cl.getBestOneCandidates(one_match_score)['R1']

['RHEA:1']

In [32]:
reac_cl.candidates

{'R_PFK': Index(['RHEA:12423', 'RHEA:13380', 'RHEA:14216', 'RHEA:15656', 'RHEA:16112',
        'RHEA:20108'],
       dtype='object')}

In [33]:
pred = {R_PFK: ['RHEA:16112']}
reac_cl.getAccuracy(pred_annotation=pred)

1.0