In [1]:
# Update recommender for additional features, and to accommodte reaction_recommender
import editdistance
import libsbml
import numpy as np
import operator
import os
import compress_pickle
import pickle
import pandas as pd
import sys
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import constants as cn
from AMAS import recommender
from AMAS import tools

In [2]:
with open(os.path.join(cn.REF_DIR, 'chebi_shortened_formula_comp.lzma'), 'rb') as f:
  ref_shortened_chebi_to_formula = compress_pickle.load(f)

In [3]:
one_biomd = 'BIOMD0000000190.xml'
one_biomd_fpath = os.path.join(BIOMODEL_DIR, one_biomd)

In [4]:
# First, sending a libsbml.SBMLDocument class instance
reader = libsbml.SBMLReader()
document = reader.readSBML(one_biomd_fpath)
# 
print("File Type of document is: %s \n" % str(type(document)))
recom = recommender.Recommender(document)
# checking if model was loaded successfully
print(recom.species.names)
print()
print(recom.reactions.reaction_components)

File Type of document is: <class 'libsbml.SBMLDocument'> 

{'SAM': 'S-adenosyl-L-methionine', 'A': 'S-adenosylmethioninamine', 'P': 'Putrescine', 'S': 'Spermine', 'D': 'Spermidine', 'aS': 'N1-Acetylspermine', 'aD': 'N1-Acetylspermidine', 'Met': 'Methionine', 'ORN': 'L-Ornithine', 'AcCoA': 'Acetyl-CoA', 'CoA': 'CoA'}

{'ODC': ['P', 'ORN'], 'SAMdc': ['A', 'SAM'], 'SSAT_for_S': ['AcCoA', 'aS', 'CoA', 'S'], 'SSAT_for_D': ['D', 'aD', 'AcCoA', 'CoA'], 'PAO_for_aD': ['P', 'aD'], 'PAO_for_aS': ['D', 'aS'], 'SpdS': ['D', 'P', 'A'], 'SpmS': ['D', 'S', 'A'], 'MAT': ['Met', 'SAM'], 'VCoA': ['AcCoA', 'CoA'], 'VacCoA': ['AcCoA', 'CoA'], 'P_efflux': ['P'], 'aD_efflux': ['aD']}


In [5]:
# def updateSpeciesWithRecommendation(inp_recom):
#   """
#   Update species_annotation class using
#   Recommendation namedtuple.
  
#   self.candidates is a sorted list of tuples,
#   (chebi_id: match_score)
#   self.formula is a unsorted list of unique formulas
  
#   Parameters
#   ----------
#   inp_recom: Recommendation
#      Result of recom.getSpeciesAnnotation
  
#   Returns
#   -------
#   None
#   """
#   recom.species.candidates.update({inp_recom.id: inp_recom.candidates})
#   formulas2update = list(set([ref_shortened_chebi_to_formula[val[0]] for val in inp_recom.candidates]))
#   recom.species.formula.update({inp_recom.id: formulas2update})
#   return None

In [6]:
# updateSpeciesWithRecommendation(inp_recom=species_recom)

In [7]:
print(recom.species.candidates)
print(recom.species.formula)

{}
{}


In [8]:
# Updating reaction annotation part

# version biomodel
# recom = recommender.Recommender(libsbml_fpath=one_biomd_fpath)
# pred_id = 'ODC'
# update=True

# version e-coli BiGG
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)
pred_id = 'R_PFK'
update=True

In [9]:
recom.getReactionListAnnotation(pred_ids=['R_PFK'],
                            use_exist_species_annotation=True)

[Recommendation(id='R_PFK', credibility=0.92, candidates=[('RHEA:12423', 1.0), ('RHEA:13380', 1.0), ('RHEA:14216', 1.0), ('RHEA:15656', 1.0), ('RHEA:16112', 1.0)], urls=['https://www.rhea-db.org/rhea/12423', 'https://www.rhea-db.org/rhea/13380', 'https://www.rhea-db.org/rhea/14216', 'https://www.rhea-db.org/rhea/15656', 'https://www.rhea-db.org/rhea/16112'])]

In [12]:
recom.getReactionAnnotation(pred_id='R_PFK',
                            use_exist_species_annotation=False)

Recommendation(id='R_PFK', credibility=0.91, candidates=[('RHEA:12423', 0.8), ('RHEA:13380', 0.8), ('RHEA:14216', 0.8), ('RHEA:15656', 0.8), ('RHEA:16112', 0.8), ('RHEA:20108', 0.8)], urls=['https://www.rhea-db.org/rhea/12423', 'https://www.rhea-db.org/rhea/13380', 'https://www.rhea-db.org/rhea/14216', 'https://www.rhea-db.org/rhea/15656', 'https://www.rhea-db.org/rhea/16112', 'https://www.rhea-db.org/rhea/20108'])

In [33]:
use_exist_species_annotation = True

specs2predict = recom.reactions.reaction_components[pred_id] 
if use_exist_species_annotation:
  pred_formulas = {val:recom.species.exist_annotation_formula[val] \
                   for val in specs2predict \
                   if val in recom.species.exist_annotation_formula.keys()}
else:
  pred_formulas = {}
remaining_species = [val for val in specs2predict if val not in pred_formulas.keys()]

if len(remaining_species) > 0:
  spec_results = recom.getSpeciesListAnnotation(pred_ids=remaining_species)
  for one_recom in spec_results:
    chebis = [val[0] for val in one_recom.candidates]
    forms = list(set([cn.ref_chebi2formula[k] \
             for k in chebis if k in cn.ref_chebi2formula.keys()]))
    pred_formulas[one_recom.id] = forms

print(pred_formulas)

{'M_h_c': ['H'], 'M_atp_c': ['C10N5O13P3'], 'M_adp_c': ['C10N5O10P2'], 'M_fdp_c': ['C6O12P2'], 'M_f6p_c': ['C6O9P']}


In [32]:
remaining_species

[]

In [16]:


pred_formulas = dict()


In [21]:
pred_formulas

{'M_h_c': ['H'],
 'M_atp_c': ['C20O4', 'C18ClN2O6S2'],
 'M_adp_c': ['C16F3IN2O4',
  'C101N7O75',
  'C14N2O',
  'C29N6O4S',
  'C19O9P',
  'C26FIN5O4',
  'C27O5',
  'C20O4',
  'C16NO6',
  'C28N6OS',
  'C19N2O2S',
  'C17Cl2F3N7O2S',
  'C26N7O2S',
  'C9N4O5',
  'C12',
  'C18ClN2O6S2',
  'C19O2',
  'C18N4O11',
  'C10N5O10P2',
  'C20N2O5S',
  'C35N4O4',
  'C8NO6',
  'C18O2',
  'C21ClN3O2',
  'C115N8O85'],
 'M_fdp_c': ['C6O12P2'],
 'M_f6p_c': ['C6O9P']}

In [18]:
pred_formulas

{'M_h_c': ['H'],
 'M_atp_c': ['C20O4', 'C18ClN2O6S2'],
 'M_adp_c': ['C16F3IN2O4',
  'C101N7O75',
  'C14N2O',
  'C29N6O4S',
  'C19O9P',
  'C26FIN5O4',
  'C27O5',
  'C20O4',
  'C16NO6',
  'C28N6OS',
  'C19N2O2S',
  'C17Cl2F3N7O2S',
  'C26N7O2S',
  'C9N4O5',
  'C12',
  'C18ClN2O6S2',
  'C19O2',
  'C18N4O11',
  'C10N5O10P2',
  'C20N2O5S',
  'C35N4O4',
  'C8NO6',
  'C18O2',
  'C21ClN3O2',
  'C115N8O85'],
 'M_fdp_c': ['C6O12P2'],
 'M_f6p_c': ['C6O9P']}

In [19]:
recom.species.exist_annotation_formula

{'M_glc__D_e': ['C6O6'],
 'M_gln__L_c': ['C5N2O3'],
 'M_gln__L_e': ['C5N2O3'],
 'M_glu__L_c': ['C5NO4'],
 'M_glu__L_e': ['C5NO4'],
 'M_glx_c': ['C2O3'],
 'M_h2o_c': ['[18O]', 'O'],
 'M_h2o_e': ['[18O]', 'O'],
 'M_h_c': ['H'],
 'M_h_e': ['H'],
 'M_icit_c': ['C6O7'],
 'M_lac__D_c': ['C3O3'],
 'M_lac__D_e': ['C3O3'],
 'M_mal__L_c': ['C4O5'],
 'M_mal__L_e': ['C4O5'],
 'M_nad_c': ['C21N7O14P2'],
 'M_nadh_c': ['C21N7O14P2'],
 'M_nadp_c': ['C21N7O17P3'],
 'M_nadph_c': ['C21N7O17P3'],
 'M_nh4_c': ['[13N]', 'N'],
 'M_13dpg_c': ['C3O10P2'],
 'M_nh4_e': ['[13N]', 'N'],
 'M_o2_c': ['O2'],
 'M_2pg_c': ['C3O7P'],
 'M_o2_e': ['O2'],
 'M_3pg_c': ['C3O7P'],
 'M_oaa_c': ['C4O5'],
 'M_pep_c': ['C3O6P'],
 'M_6pgc_c': ['C6O10P'],
 'M_pi_c': ['O4P'],
 'M_6pgl_c': ['C6O9P'],
 'M_pi_e': ['O4P'],
 'M_ac_c': ['C2O2'],
 'M_pyr_c': ['C3O3'],
 'M_pyr_e': ['C3O3'],
 'M_q8_c': ['C49O4'],
 'M_q8h2_c': ['C49O4'],
 'M_r5p_c': ['C5O8P'],
 'M_ru5p__D_c': ['C5O8P'],
 'M_ac_e': ['C2O2'],
 'M_acald_c': ['C2O'],
 'M_s7p_c': 

In [13]:
specs2predict = recom.reactions.reaction_components[pred_id]
spec_results = recom.getSpeciesListAnnotation(pred_ids=specs2predict)
# based on the function above; need to recreate it. 
pred_formulas = dict()
for one_recom in spec_results:
  chebis = [val[0] for val in one_recom.candidates]
  forms = list(set([cn.ref_chebi2formula[k] \
           for k in chebis if k in cn.ref_chebi2formula.keys()]))
  pred_formulas[one_recom.id] = forms
#
pred_reaction = recom.reactions.predictAnnotation(inp_spec_dict=pred_formulas,
                                                 inp_reac_list=[pred_id],
                                                 update=update)

In [14]:
pred_reaction

{'ODC': [('RHEA:28830', 1.0), ('RHEA:22967', 0.5), ('RHEA:59051', 0.5)]}

In [16]:
{one_k: len(pred_reaction[one_k]) for one_k in pred_reaction}

{'ODC': 3}