In [1]:
# Data for figure/tables in supplementary materials 

import collections
import compress_pickle
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")
RESULT_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/AMAS_suppl/result_files'

from AMAS import annotation_maker as am
from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
recom = recommender.Recommender(libsbml_fpath=ecoli_fpath)

In [5]:
model = recom.sbml_document.model

In [12]:
print(recom.getSpeciesIDs())

['M_glc__D_e', 'M_gln__L_c', 'M_gln__L_e', 'M_glu__L_c', 'M_glu__L_e', 'M_glx_c', 'M_h2o_c', 'M_h2o_e', 'M_h_c', 'M_h_e', 'M_icit_c', 'M_lac__D_c', 'M_lac__D_e', 'M_mal__L_c', 'M_mal__L_e', 'M_nad_c', 'M_nadh_c', 'M_nadp_c', 'M_nadph_c', 'M_nh4_c', 'M_13dpg_c', 'M_nh4_e', 'M_o2_c', 'M_2pg_c', 'M_o2_e', 'M_3pg_c', 'M_oaa_c', 'M_pep_c', 'M_6pgc_c', 'M_pi_c', 'M_6pgl_c', 'M_pi_e', 'M_ac_c', 'M_pyr_c', 'M_pyr_e', 'M_q8_c', 'M_q8h2_c', 'M_r5p_c', 'M_ru5p__D_c', 'M_ac_e', 'M_acald_c', 'M_s7p_c', 'M_acald_e', 'M_accoa_c', 'M_succ_c', 'M_succ_e', 'M_succoa_c', 'M_acon_C_c', 'M_xu5p__D_c', 'M_actp_c', 'M_adp_c', 'M_akg_c', 'M_akg_e', 'M_amp_c', 'M_atp_c', 'M_cit_c', 'M_co2_c', 'M_co2_e', 'M_coa_c', 'M_dhap_c', 'M_e4p_c', 'M_etoh_c', 'M_etoh_e', 'M_f6p_c', 'M_fdp_c', 'M_for_c', 'M_for_e', 'M_fru_e', 'M_fum_c', 'M_fum_e', 'M_g3p_c', 'M_g6p_c']


### S1. ChEBI terms and converting it as chemical formula

In [30]:
# check how many times it was used
num_all_r = 0
num_r_with_h2o = 0
for one_r in model.getListOfReactions():
  num_all_r += 1
  reactants = [val.species for val in one_r.getListOfReactants()]
  products = [val.species for val in one_r.getListOfReactants()]
  components = set(reactants + products)
  if 'M_h2o_c' in components or 'M_h2o_e' in components or 'M_o2_c' in components:
    num_r_with_h2o += 1
print(num_r_with_h2o / num_all_r)

0.16842105263157894


In [28]:
num_all_r

95

In [31]:
one_s = model.getSpecies('M_h2o_e')
print(one_s.name)
print(one_s.getAnnotationString())

H2O H2O
<sbml:annotation xmlns:sbml="http://www.sbml.org/sbml/level3/version1/core">
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="#M_h2o_e">
      <bqbiol:is xmlns:bqbiol="http://biomodels.net/biology-qualifiers/">
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/bigg.metabolite/h2o"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:CPD-15815"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:OXONIUM"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:HYDROXYL-GROUP"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:WATER"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:OH"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:13352"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:30490"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:43228"/>
          <rdf

In [None]:
one