In [1]:
# Collect reaction/species annotations, and  
# determine whether they can be assembled (but isn't it same as semanticSBML?)
import collections
import compress_pickle
import editdistance
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import re
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
REACTOME_DIR = os.path.join(DATA_DIR, 'reactome')
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'

# dir for alternative methods for species
ALT_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/AMAS_suppl/alt_methods_species_data'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [2]:
biomd_fnames = [val for val in os.listdir(BIOMODEL_DIR) if val[-4:]=='.xml']
biomd_fpaths = [os.path.join(BIOMODEL_DIR, val) for val in biomd_fnames]

In [3]:
all_reaction_annotation = dict()
for idx, one_biomd in enumerate(biomd_fnames):
  if idx % 100 == 0:
    print("We are at", idx)
  one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  one_rset_annotation = recom.reactions.exist_annotation
  if one_rset_annotation:
    all_reaction_annotation[one_biomd] = one_rset_annotation  

We are at 0
We are at 100
We are at 200
We are at 300
We are at 400
We are at 500
We are at 600
We are at 700
We are at 800
We are at 900


In [6]:
one_biomd = 'BIOMD0000000627.xml'
one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)

reader = libsbml.SBMLReader()
document = reader.readSBML(one_fpath)
model = document.getModel()

reac_dict_raw_kegg = {r.getId():tools.getQualifierFromString(r.getAnnotationString(), cn.KEGG_REACTION) \
                   for r in model.getListOfReactions()}
reac_dict_raw_filt_kegg = {k:reac_dict_raw_kegg[k] \
                         for k in reac_dict_raw_kegg.keys() \
                         if reac_dict_raw_kegg[k] is not None}
reac_dict_kegg = {k:[cn.REF_KEGG2RHEA_BI[val] \
                   for val in reac_dict_raw_filt_kegg[k] if val in cn.REF_KEGG2RHEA_BI.keys()] \
                  for k in reac_dict_raw_filt_kegg.keys()}
reac_dict_filt_kegg = {k: reac_dict_kegg[k] for k in reac_dict_kegg.keys() \
                       if reac_dict_kegg[k]}

In [8]:
one_biomd = 'BIOMD0000000627.xml'
one_fpath = os.path.join(BIOMODEL_DIR, one_biomd)
recom = recommender.Recommender(libsbml_fpath=one_fpath)
reactions = ra.ReactionAnnotation(libsbml_fpath=one_fpath)
reactions.exist_annotation

{'reaction_9': ['RHEA:18160'],
 'reaction_10': ['RHEA:18160'],
 'AK_neurons': ['RHEA:12976'],
 'AK_astrocytes': ['RHEA:12976'],
 'CK_astrocytes_forward__R01881': ['RHEA:17160'],
 'CK_neurons_forward__R01881': ['RHEA:17160'],
 'LDH_astrocytes_forward__R00703': ['RHEA:23447'],
 'LDH_neurons_forward__R00703': ['RHEA:23447'],
 'SOL_neurons__R02035': ['RHEA:12559'],
 'SOL_astrocytes__R02035': ['RHEA:12559'],
 'GND_neurons___R01528': ['RHEA:10119'],
 'GND_astrocytes__R01528': ['RHEA:10119'],
 'RPE_neurons__R01529': ['RHEA:10119'],
 'RPE_astrocytes__R01529': ['RHEA:10119'],
 'RKI_astrocytes__R01056': ['RHEA:14660'],
 'RKI_neurons__R01056': ['RHEA:14660'],
 'TKL_1_astrocytes__R01641': ['RHEA:10511'],
 'TKL_1_neurons__R01641': ['RHEA:10511'],
 'TAL_astrocytes__R01827': ['RHEA:17056'],
 'TAL_neurons__R01827': ['RHEA:17056'],
 'TKL_2_astrocytes__R01830': ['RHEA:27629'],
 'TKL_2_neurons__R01830': ['RHEA:27629']}

In [10]:
reac_dict_filt_kegg

{'reaction_9': ['RHEA:18160'],
 'reaction_10': ['RHEA:18160'],
 'AK_neurons': ['RHEA:12976'],
 'AK_astrocytes': ['RHEA:12976'],
 'CK_astrocytes_forward__R01881': ['RHEA:17160'],
 'CK_neurons_forward__R01881': ['RHEA:17160'],
 'LDH_astrocytes_forward__R00703': ['RHEA:23447'],
 'LDH_neurons_forward__R00703': ['RHEA:23447'],
 'SOL_neurons__R02035': ['RHEA:12559'],
 'SOL_astrocytes__R02035': ['RHEA:12559'],
 'GND_neurons___R01528': ['RHEA:10119'],
 'GND_astrocytes__R01528': ['RHEA:10119'],
 'RPE_neurons__R01529': ['RHEA:10119'],
 'RPE_astrocytes__R01529': ['RHEA:10119'],
 'RKI_astrocytes__R01056': ['RHEA:14660'],
 'RKI_neurons__R01056': ['RHEA:14660'],
 'TKL_1_astrocytes__R01641': ['RHEA:10511'],
 'TKL_1_neurons__R01641': ['RHEA:10511'],
 'TAL_astrocytes__R01827': ['RHEA:17056'],
 'TAL_neurons__R01827': ['RHEA:17056'],
 'TKL_2_astrocytes__R01830': ['RHEA:27629'],
 'TKL_2_neurons__R01830': ['RHEA:27629']}

In [6]:
dum_df = {'x': ['a', 'b'], 'y': ['c', 'd']}
pd.DataFrame.from_dict(dum_df)

Unnamed: 0,x,y
0,a,c
1,b,d


In [5]:
all_reaction_annotation

{'BIOMD0000000191.xml': {'Arginase': ['RHEA:20572'],
  'NOS': ['RHEA:19900'],
  'ODC': ['RHEA:22967']},
 'BIOMD0000000218.xml': {'CS': ['RHEA:16848'],
  'ACN': ['RHEA:10339'],
  'ICD1': ['RHEA:23635'],
  'ICD2': ['RHEA:23635'],
  'KGD': ['RHEA:10527'],
  'SSADH': ['RHEA:13220'],
  'ScAS': ['RHEA:11519'],
  'SDH': ['RHEA:18284'],
  'FUM': ['RHEA:12463'],
  'MDH': ['RHEA:21435'],
  'ICL1': ['RHEA:13248'],
  'ICL2': ['RHEA:13248'],
  'MS': ['RHEA:18184']},
 'BIOMD0000000219.xml': {'CS': ['RHEA:16848'],
  'ACN': ['RHEA:10339'],
  'ICD1': ['RHEA:23635'],
  'ICD2': ['RHEA:23635'],
  'KDH': ['RHEA:27789'],
  'KGD': ['RHEA:10527'],
  'SSADH': ['RHEA:13216'],
  'ScAS': ['RHEA:11519'],
  'SDH': ['RHEA:18284'],
  'FUM': ['RHEA:12463'],
  'MDH': ['RHEA:21435'],
  'ICL1': ['RHEA:13248'],
  'ICL2': ['RHEA:13248'],
  'MS': ['RHEA:18184']},
 'BIOMD0000000231.xml': {'v1': ['RHEA:20855'],
  'v2': ['RHEA:12976'],
  'v3': ['RHEA:18160'],
  'v4': ['RHEA:23447']},
 'BIOMD0000000225.xml': {'vpfk': ['RHEA:161