In [19]:
# materials used to reply to the reviewers' questions. 
import collections

import compress_pickle
import copy
import editdistance
import itertools
import libsbml
import numpy as np
import operator
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/AMAS_suppl/figure_files'
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

# address changed - as of May 12, 2023. 
MANU_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Manu_figures_new'
SUPPL_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Supple_figures_new'
# TEMPFIG_DIR = os.path.join(FIGURE_DIR, 'temp')

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

biggs = [val for val in os.listdir(BIGG_DIR) if val[-4:]=='.xml']
print("Number of BiGG models to be analyzed: %d" % len(biggs))

spec_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_species_accuracy.csv'))
biomds_spec = np.unique(spec_ml_df['model'])
print("Number of BioModels to be analyzed for SPECIES: %d" % len(biomds_spec))

reac_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_reactions_accuracy.csv'))
biomds_reac = np.unique(reac_ml_df['model'])
print("Number of BioModels to be analyzed for REACTIONS: %d" % len(biomds_reac))

Number of BiGG models to be analyzed: 108
Number of BioModels to be analyzed for SPECIES: 306
Number of BioModels to be analyzed for REACTIONS: 131


In [2]:
# Reviewer 1-1
# Why binary algorithm? (H2O H2O)
recom = recommender.Recommender(ecoli_fpath)
model = recom.sbml_document.getModel()

In [5]:
# Reviewer: checking the species algorithm
# sa.CHARCOUNT_COMB_DF
ref_df = sa.CHARCOUNT_DF
inp_strs=['atpp']
unq_strs = list(set(inp_strs))
one_query, name_used = recom.species.prepareCounterQuery(specs=unq_strs,
                                                         ref_cols=sa.CHARCOUNT_DF.columns,
                                                         use_id=False) 
multi_mat = ref_df.dot(one_query)

In [7]:
multi_mat

Unnamed: 0,atpp
0,0.284268
1,0.244949
2,0.288675
3,0.163299
4,0.149071
...,...
431019,0.291310
431020,0.264964
431021,0.214801
431022,0.275839


In [28]:
1/np.sqrt(5)

0.4472135954999579

In [14]:
df = sa.CHARCOUNT_COMB_DF
#[str(val) for val in df['synonym'] if len(str(val))<=4 and len(str(val))!=len(set(str(val)))]

In [5]:
ecoli_fpath

'/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg/e_coli_core.xml'

In [15]:
# for one_s in model.getListOfSpecies():
  # print(one_s.getId(), one_s.name) 
    
# for one_r in model.getListOfReactions():
#   one_str = one_r.getAnnotationString()
#   one_anot = tools.extractRheaFromAnnotationString(one_str)
#   print(one_anot)
#   print("\n")

In [24]:
for one_bigg in biggs:
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(one_fpath)
  model = recom.sbml_document.getModel()
  num_spec_anot = len(recom.species.exist_annotation)
  num_reac_anot = len(recom.reactions.exist_annotation)
  break

In [25]:
print(num_spec_anot)
model.getNumSpecies()

2216


4456

In [26]:
print(num_reac_anot)
model.getNumReactions()

1645


6663

In [16]:
# one_biomd = 'BIOMD0000000408.xml'
# one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
# recom = recommender.Recommender(one_fpath)
# model = recom.sbml_document.getModel()
# for one_r in model.getListOfReactions():
#   print(one_r.getId())
#   print([str(val.stoichiometry)+"+"+val.species for val in one_r.getListOfReactants()])
#   print([str(val.stoichiometry)+"+"+val.species for val in one_r.getListOfProducts()])
#   print("------------\n")

In [21]:
# biggs
recom = recommender.Recommender(ecoli_fpath)
model = recom.sbml_document.getModel()

In [None]:
model.get

In [20]:
# biomds_spec

In [11]:
biomodels = [val for val in os.listdir(BIOMODEL_DIR) if val[-4:]=='.xml']
for one_biomd in biomodels:
  recom = recommender.Recommender(one_biomd)
  model = recom.sbml_document.getModel()
  

In [28]:
recom.species.getCScores(inp_strs=['atp'],
                         mssc='top',
                         cutoff=0.0)

{'atp': [('CHEBI:15422', 1.0000000000000002),
  ('CHEBI:15702', 1.0000000000000002),
  ('CHEBI:30616', 1.0000000000000002),
  ('CHEBI:37537', 1.0000000000000002),
  ('CHEBI:53394', 1.0000000000000002),
  ('CHEBI:74926', 1.0000000000000002)]}

In [34]:
one_s = model.getListOfSpecies()[0]

In [37]:
one_s.removeAnnotation()

AttributeError: 'Species' object has no attribute 'removeAnnotation'