In [1]:
# Download Agora2 file and see if it makes sense;
import requests
import os
import shutil

import httplib2
from bs4 import BeautifulSoup, SoupStrainer
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed

AGORA_URL = 'https://www.vmh.life/files/reconstructions/AGORA2/version2.01/' +\
            'sbml_files/individual_reconstructions/'
SAVE_FPATH = '/Volumes/MGTEC/Agora2'

In [2]:
# collecting all downloadable files
indiv_files = []
http = httplib2.Http()
status, response = http.request(AGORA_URL)

for link in BeautifulSoup(response, parse_only=SoupStrainer('a')):
    if link.has_attr('href'):
      if link['href'][-3:]=='xml':
        indiv_files.append(link['href'])
len(indiv_files)

7302

In [3]:
# # working example for a single file

# fname = indiv_files[0]
# url = os.path.join(agora_url, fname)
# response = requests.get(url)
# with open(os.path.join(save_fpath, fname), 'wb') as file:
#     file.write(response.content)

In [4]:
def download_url_to_file(fname):
  """
  Parameters
  ----------
  fname: str
      Name of file to be downloaded
  """
  url = os.path.join(AGORA_URL, fname)
  response = requests.get(url)
  fpath = os.path.join(SAVE_FPATH, fname)
  with open(fpath, 'wb') as file:
    file.write(response.content)
  return fpath

# create the pool of worker threads
with ThreadPoolExecutor(max_workers=20) as exe:
    # dispatch all download tasks to worker threads
    futures = [exe.submit(download_url_to_file, f) for f in indiv_files]
    # report results as they become available
    for future in as_completed(futures):
      outpath = future.result()

In [5]:
# new accuracy & precision plots; 
# collect all data so that no other loading of data would be needed. 
import collections

import compress_pickle
import copy
import editdistance
import itertools
import libsbml
import numpy as np
import operator
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/AMAS_suppl/figure_files'
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

# address changed - as of May 12, 2023. 
MANU_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Manu_figures_new'
SUPPL_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Supple_figures_new'
# TEMPFIG_DIR = os.path.join(FIGURE_DIR, 'temp')

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

biggs = [val for val in os.listdir(BIGG_DIR) if val[-4:]=='.xml']
print("Number of BiGG models to be analyzed: %d" % len(biggs))

spec_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_species_accuracy.csv'))
biomds_spec = np.unique(spec_ml_df['model'])
print("Number of BioModels to be analyzed for SPECIES: %d" % len(biomds_spec))

reac_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_reactions_accuracy.csv'))
biomds_reac = np.unique(reac_ml_df['model'])
print("Number of BioModels to be analyzed for REACTIONS: %d" % len(biomds_reac))

Number of BiGG models to be analyzed: 108
Number of BioModels to be analyzed for SPECIES: 306
Number of BioModels to be analyzed for REACTIONS: 131


In [6]:
reader = libsbml.SBMLReader()
sbml_document = reader.readSBML(os.path.join(SAVE_FPATH, indiv_files[0]))
model = sbml_document.getModel()

In [8]:
model.getListOfSpecies()

<Species M_10fthf__91__c__93__ "10-Formyltetrahydrofolate">

In [9]:
recom = recommender.Recommender(libsbml_fpath=os.path.join(SAVE_FPATH, indiv_files[0]))

In [12]:
len(recom.reactions.exist_annotation)

644