In [129]:
# Additional analysis from agora2_checking.ipynb
 
import collections
from collections import Counter

import compress_pickle
import copy
import editdistance
import itertools
import libsbml
import numpy as np
import operator
import os
import pickle
import pandas as pd
import random
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/AMAS_suppl/figure_files'
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")

# address changed - as of May 12, 2023. 
MANU_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Manu_figures_new'
SUPPL_FIGURE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/writeup/AMAS/Supple_figures_new'

PRES_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/presentation/sauro_lab/sauro_lab_2023/jun82023'

from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

biggs = [val for val in os.listdir(BIGG_DIR) if val[-4:]=='.xml']
print("Number of BiGG models to be analyzed: %d" % len(biggs))

spec_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_species_accuracy.csv'))
biomds_spec = np.unique(spec_ml_df['model'])
print("Number of BioModels to be analyzed for SPECIES: %d" % len(biomds_spec))

reac_ml_df = pd.read_csv(os.path.join(ACCURACY_DIR, 'biomd_individual_reactions_accuracy.csv'))
biomds_reac = np.unique(reac_ml_df['model'])
print("Number of BioModels to be analyzed for REACTIONS: %d" % len(biomds_reac))


SAVE_FPATH = '/Volumes/MGTEC/Agora2'

Number of BiGG models to be analyzed: 108
Number of BioModels to be analyzed for SPECIES: 306
Number of BioModels to be analyzed for REACTIONS: 131


In [7]:
agoras = [val for val in os.listdir(SAVE_FPATH) if val[-3:]=='xml']
print(len(agoras))
biggs = [val for val in os.listdir(BIGG_DIR) if val[-3:]=='xml']
print(len(biggs))

7302
108


In [10]:
models_not_working = []
agora_all_species_ll = []

for idx, one_a in enumerate(agoras):
  if idx % 1000 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(SAVE_FPATH, one_a)
  try: 
    recom = recommender.Recommender(libsbml_fpath=one_fpath)
  except:
    models_not_working.append(one_a)
    continue
  model = recom.sbml_document.getModel()
  agora_all_species_ll.append([recom.species.getNameToUse(val.getId()) \
                               for val in model.getListOfSpecies()])
agora_all_species = list(set(itertools.chain(*agora_all_species_ll)))

# total number of unique species IDs used throughout agora 2
len(agora_all_species)

We're at 0
We're at 1000
We're at 2000
We're at 3000
We're at 4000
We're at 5000
We're at 6000
We're at 7000


3266

In [11]:
######
###### ver. 2???
models_not_working = []
agora_all_species_ll = []
agors_all_reactions_ll = []

for idx, one_a in enumerate(agoras):
  if idx % 1000 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(SAVE_FPATH, one_a)
  try: 
    recom = recommender.Recommender(libsbml_fpath=one_fpath)
  except:
    models_not_working.append(one_a)
    continue
  model = recom.sbml_document.getModel()
  agora_all_species_ll.append([recom.species.getNameToUse(val.getId()) \
                               for val in model.getListOfSpecies()])
  agora_all_reactions_ll.append([val.getId() for val in model.getListOfReactions()])
agora_all_species = list(set(itertools.chain(*agora_all_species_ll)))
agora_all_reactions = list(set(itertools.chain(*agora_all_reactions_ll)))

# total number of unique species IDs used throughout agora 2
print(len(agora_all_species))
print(len(agora_all_reactions))

########
########

We're at 0
We're at 1000
We're at 2000
We're at 3000
We're at 4000
We're at 5000
We're at 6000
We're at 7000
3266
6883


In [24]:
def PrintTotAveNum(ll):
  """
  Print the total and
  model-level average 
  number of models in the repository
  
  Parameters
  ----------
  ll: list-list
      e.g., [[100, 202, 303], [...]]
      
  Returns
  -------
  Null
  """
  totalnum = np.sum([len(val) for val in ll])
  avenum = totalnum / len(ll)
  print("Total number: %d, Average per model: %.0f" % (totalnum, avenum))

In [25]:
PrintTotAveNum(agora_all_species_ll)

PrintTotAveNum(agors_all_reactions_ll)

PrintTotAveNum(bigg_all_species_ll)

PrintTotAveNum(bigg_all_reactions_ll)

Total number: 6826775, Average per model: 1229
Total number: 7505652, Average per model: 1351
Total number: 180802, Average per model: 1674
Total number: 251424, Average per model: 2328


In [14]:
totalnum_agora_species = np.sum([len(val) for val in agora_all_species_ll])
totalnum_agora_reactions = np.sum([len(val) for val in agors_all_reactions_ll])
totalnum_bigg_species = np.sum([len(val) for val in bigg_all_species_ll])
totalnum_bigg_reactions = np.sum([len(val) for val in bigg_all_reactions_ll])

print(totalnum_agora_species)
print(totalnum_agora_reactions)
print(totalnum_bigg_species)
print(totalnum_bigg_reactions)

5556

In [12]:
# summary of biggs

bigg_all_species_ll = []
bigg_all_reactions_ll = []
for idx, one_bigg in enumerate(biggs):
  if idx % 20 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  model = recom.sbml_document.getModel()
  bigg_all_species_ll.append([recom.species.getNameToUse(val.getId()) \
                               for val in model.getListOfSpecies()])
  bigg_all_reactions_ll.append([val.getId() \
                               for val in model.getListOfReactions()])
bigg_all_species = list(set(itertools.chain(*bigg_all_species_ll)))
bigg_all_reactions = list(set(itertools.chain(*bigg_all_reactions_ll)))


# total number of unique species IDs used throughout agora 2
print(len(bigg_all_species))
print(len(bigg_all_reactions))

We're at 0
We're at 20
We're at 40
We're at 60
We're at 80
We're at 100
8813
28548


In [96]:
# From AGORA2; detecting CHEBI:1 with spurious species
one_a = agoras[0]
one_a = 'Yersinia_kristensenii_Y231.xml'
one_fpath = os.path.join(SAVE_FPATH, one_a)
recom = recommender.Recommender(libsbml_fpath=one_fpath)
value_trips = []
comb_specs = [val.getId() for val in model.getListOfSpecies()]
for one_k in recom.species.exist_annotation.keys():
  model = recom.sbml_document.getModel() 
  value_trips.append((one_k,
                      recom.species.getNameToUse(one_k),
                      recom.species.exist_annotation[one_k]))
rem_specs = [val for val in comb_specs if val not in recom.species.exist_annotation.keys()]
model.getNumSpecies()

1375

In [99]:
print(model.getSpecies(rem_specs[5]).name)

10-methyl-dodecanoyl-ACP


In [102]:
recom.recommendSpecies('M_Largn__91__c__93__')

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,UPDATE ANNOTATION
0,Yersinia_kristensenii_Y231.xml,species,M_Largn__91__c__93__,L-Arogenate,M_Largn__91__c__93__,CHEBI:17530,L-arogenic acid,1.0,0,ignore
1,Yersinia_kristensenii_Y231.xml,species,M_Largn__91__c__93__,L-Arogenate,M_Largn__91__c__93__,CHEBI:58180,L-arogenate(1-),1.0,0,ignore
2,Yersinia_kristensenii_Y231.xml,species,M_Largn__91__c__93__,L-Arogenate,M_Largn__91__c__93__,CHEBI:18357,(R)-noradrenaline,0.889,1,keep


In [63]:
# first model
value_trips = []
for one_k in recom.species.exist_annotation.keys():
  model = recom.sbml_document.getModel() 
  value_trips.append((one_k,
                      recom.species.getNameToUse(one_k),
                      recom.species.exist_annotation[one_k]))

In [64]:
recom.recommendSpecies('M_10fthf__91__c__93__')

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,UPDATE ANNOTATION
0,Abiotrophia_defectiva_ATCC_49176.xml,species,M_10fthf__91__c__93__,10-Formyltetrahydrofolate,M_10fthf__91__c__93__,CHEBI:15637,10-formyltetrahydrofolic acid,1.0,0,ignore


In [134]:
random.sample(working_agoras, 4)

['Lactobacillus_animalis_ERR2221280.xml',
 'Lactobacillus_casei_ERR2221291.xml',
 'Intestinibacter_nov_ERR2221096.xml',
 'Acinetobacter_baumannii_MDR_ZJ06.xml']

In [140]:
# find all elements with 10-Formyltetrahydrofolate as a display name
formyl_tups = []
FORMYL = '10-Formyltetrahydrofolate'
FORMYL_ID = 'M_10fthf__91__c__93__'
IDS = ['M_10fthf__91__c__93__',
       'M_12ppd_S__91__c__93__',
       'M_13dpg__91__c__93__']


comb_tups = {val:[] for val in IDS}
working_agoras = [val for val in agoras if val not in models_not_working]

for idx, one_a in enumerate(working_agoras):
  if idx % 1000 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(SAVE_FPATH, one_a)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  model = recom.sbml_document.getModel()
  specs = [val.getId() for val in model.getListOfSpecies()]
  for ONE_ID in [FORMYL_ID]:
    if ONE_ID in specs:
      filt_specs = [val for val in specs if val==ONE_ID]
      for one_s in filt_specs:
        if ONE_ID in recom.species.exist_annotation.keys():
          annotation = recom.species.exist_annotation[ONE_ID]
        else:
          annotation = 'not existing'
        comb_tups[ONE_ID].append((one_a,
                                  recom.species.getNameToUse(ONE_ID),
                                  annotation[0]
                                 ))

We're at 0
We're at 1000
We're at 2000



KeyboardInterrupt



In [304]:
# to calculate cohen's kappa
ag1 = working_agoras[0]
ag2 = working_agoras[1]
ag3 = working_agoras[3]

one_fpath1 = os.path.join(SAVE_FPATH, ag1)
recom1 = recommender.Recommender(libsbml_fpath=one_fpath1)
agm1 = recom1.sbml_document.getModel()

one_fpath2 = os.path.join(SAVE_FPATH, ag2)
recom2 = recommender.Recommender(libsbml_fpath=one_fpath2)
agm2 = recom2.sbml_document.getModel()

one_fpath3 = os.path.join(SAVE_FPATH, ag3)
recom3 = recommender.Recommender(libsbml_fpath=one_fpath3)
agm3 = recom3.sbml_document.getModel()




# tups = []
# for one_a in [ag1, ag2]:
#   one_fpath = os.path.join(SAVE_FPATH, one_a)
#   recom = recommender.Recommender(libsbml_fpath=one_fpath)
#   model = recom.sbml_document.getModel()
#   model_tups = []
#   for one_s in model.getListOfSpecies():
#     if one_s.getId() in recom.species.exist_annotation.keys():
#       model_tups.append((one_s.getId(),
#                         recom.species.getNameToUse(one_s.getId()),
#                         recom.species.exist_annotation[one_s.getId()][0]))
#     # else:
#     #   model_tups.append((one_s.getId(),
#     #                     recom.species.getNameToUse(one_s.getId()),
#     #                     'None'))
#   tups.append(model_tups)
    

In [312]:
one_a = 'Yersinia_kristensenii_Y231.xml'
one_fpath = os.path.join(SAVE_FPATH, one_a)
recom = recommender.Recommender(libsbml_fpath=one_fpath)
model = recom.sbml_document.getModel()
recom.species.exist_annotation['M_Largn__91__c__93__']

['CHEBI:18357']

In [211]:
agm2.getNumSpecies()

952

In [269]:
com_ids = list(set(recom2.species.exist_annotation.keys()).intersection(recom3.species.exist_annotation.keys()))
print(len(com_ids))

485


In [270]:
res = []
for one_s in com_ids:
  if recom2.species.exist_annotation[one_s] == recom3.species.exist_annotation[one_s]:
    res.append(True)
  else:
    res.append(False)
np.mean(res)

1.0

In [271]:
ag2

'Acaricomes_phytoseiuli_DSM_14247.xml'

In [305]:
# # subjects
# com_ids = list(set(recom1.species.exist_annotation.keys()).intersection(recom2.species.exist_annotation.keys()))
# N = len(com_ids)
# # n: number of raters
# n = 2
# Nn = N*n

# # collect unique chebi terms
# chebis1 = set(itertools.chain(*[val[1] for val in list(recom1.species.exist_annotation.items()) \
#                                if val[0] in com_ids]))
# chebis2 = set(itertools.chain(*[val[1] for val in list(recom2.species.exist_annotation.items()) \
#                                if val[0] in com_ids]))
# chebis = list(chebis1.union(chebis2))
# k = len(chebis)

# # create df
# df = pd.DataFrame(np.zeros((len(com_ids), len(chebis))))
# df.index=com_ids
# df.columns=chebis
# print(df.shape)
# for one_id in df.index:
#   df.loc[one_id, recom1.species.exist_annotation[one_id][0]] += 1
#   df.loc[one_id, recom2.species.exist_annotation[one_id][0]] += 1
    
# # p_j
# pjs = df.sum(0) / Nn

# # P_i
# pis = [(np.sum([val**2 for val in df.loc[one_i,:]])-2)/2 for one_i in df.index]

# pbar = np.sum(pis)/N
# pebar = np.sum([val**2 for val in pjs])

# kappa = (pbar-pebar)/(1-pebar)
# print(kappa)

In [306]:
one_i = df.index[0]
len([(np.sum([val**2 for val in df.loc[one_i,:]])-2)/2 for one_i in df.index])

516

In [307]:
def getFleiss(one_recom, two_recom):
  """
  Get Fleiss' kappa
  using two recom classes
  
  Parameters
  ----------
  one_recom: AMAS.Recommender
  two_recom: AMAS.Recommender
  
  Returns
  -------
  :float
    kappa statistic
  """
  # subjects
  com_ids = list(set(one_recom.species.exist_annotation.keys()).intersection(two_recom.species.exist_annotation.keys()))
  N = len(com_ids)
  # n: number of raters
  n = 2
  Nn = N*n

  # collect unique chebi terms
  chebis1 = set(itertools.chain(*[val[1] for val in list(one_recom.species.exist_annotation.items()) \
                                 if val[0] in com_ids]))
  chebis2 = set(itertools.chain(*[val[1] for val in list(two_recom.species.exist_annotation.items()) \
                                 if val[0] in com_ids]))
  chebis = list(chebis1.union(chebis2))
  k = len(chebis)

  # create df
  df = pd.DataFrame(np.zeros((len(com_ids), len(chebis))))
  df.index=com_ids
  df.columns=chebis
  print(df.shape)
  for one_id in df.index:
    df.loc[one_id, one_recom.species.exist_annotation[one_id][0]] += 1
    df.loc[one_id, two_recom.species.exist_annotation[one_id][0]] += 1  
  # p_j
  pjs = df.sum(0) / Nn

  # P_i
  pis = [(np.sum([val**2 for val in df.loc[one_i,:]])-2)/2 for one_i in df.index]

  pbar = np.sum(pis)/N
  pebar = np.sum([val**2 for val in pjs])

  kappa = (pbar-pebar)/(1-pebar)
  return kappa

In [308]:
getFleiss(one_recom=recom2, two_recom=recom1)

(516, 438)


0.22658061697968174

In [328]:
p = libsbml.parseFormula ('k1 / s1')
print(libsbml.writeMathMLToString (p))

<?xml version="1.0" encoding="UTF-8"?>
<math xmlns="http://www.w3.org/1998/Math/MathML">
  <apply>
    <divide/>
    <ci> k1 </ci>
    <ci> s1 </ci>
  </apply>
</math>


In [333]:
s = libsbml.writeMathMLToString (p)
s

'<?xml version="1.0" encoding="UTF-8"?>\n<math xmlns="http://www.w3.org/1998/Math/MathML">\n  <apply>\n    <divide/>\n    <ci> k1 </ci>\n    <ci> s1 </ci>\n  </apply>\n</math>'

In [310]:
getFleiss(one_recom=recom1, two_recom=recom3)

(514, 426)


0.22667281698003802

In [311]:
getFleiss(one_recom=recom3, two_recom=recom3)

(580, 471)


1.0

In [293]:
print([ag1, ag2, ag3])

['Abiotrophia_defectiva_ATCC_49176.xml', 'Acaricomes_phytoseiuli_DSM_14247.xml', 'Acetanaerobacterium_elongatum_CGMCC_1_5012.xml']


In [190]:
Counter([k[1] for k in [val for val in tups[0] if val[2]=='CHEBI:1']])

Counter({'10-Formyltetrahydrofolate': 1,
         '(S)-propane-1,2-diol': 1,
         '3-Phospho-D-glyceroyl phosphate': 1,
         '1-acyl-sn-glycerol 3-phosphate(2-)': 1,
         '1-Pyrroline-5-carboxylate': 1,
         '(S)-2-[5-Amino-1-(5-phospho-D-ribosyl)imidazole-4-carboxamido]succinate': 1,
         'L-2-amino-3-oxobutanoic acid': 1,
         '2-Deoxy-D-ribose 1-phosphate': 1,
         '2-deoxy-D-ribose 5-phosphate(2-)': 1,
         '2-methylacetoacetyl-CoA(4-)': 1,
         '2-methylbutanoyl-CoA(4-)': 1,
         '2-methyl-3-oxopropanoate': 1,
         '2-Oxobutanoate': 1,
         'D-Glycerate 2-phosphate': 1,
         '2-phosphonatoglycolate(3-)': 1,
         '3-(4-hydroxyphenyl)pyruvate': 1,
         '(S)-3-hydroxybutanoyl-CoA(4-)': 1,
         '(S)-3-Hydroxydecanoyl-CoA': 1,
         '(S)-3-Hydroxydodecanoyl-CoA': 1,
         '(2S,3S)-3-hydroxy-2-methylbutanoyl-CoA(4-)': 1,
         '3-hydroxy-2-methylpropanoate': 1,
         '(S)-3-Hydroxytetradecanoyl-CoA': 1,
        

In [172]:
def getIdFromName(inp_name):
  # should be a display name
  filt_s = [val[0] for val in specs if val[1]==inp_name]
  return filt_s

In [173]:
getIdFromName('10-formyltetrahydrofolate')

[]

In [167]:
filt_s = [val[0] for val in specs if val[1]=='10-Formyltetrahydrofolate']
filt_s

['M_10fthf__91__c__93__']

In [175]:
inp_name = '10-Formyltetrahydrofolate'
filt_s = [val[0] for val in specs if val[1]==inp_name]
filt_s[0]

'M_10fthf__91__c__93__'

In [148]:
recom.species.exist_annotation['10-formyltetrahydrofolate']

KeyError: '10-formyltetrahydrofolate'

In [150]:
spec_dict

{'10-formyltetrahydrofolate-[Glu](5)': 1,
 '10-Formyltetrahydrofolate': 1,
 '10-methyl-3-hydroxy-dodecanoyl-ACP': 1,
 '10-methyl-3-hydroxy-undecanoyl-ACP': 1,
 '10-methyl-3-oxo-dodecanoyl-ACP': 1,
 '10-methyl-3-oxo-undecanoyl-ACP': 1,
 '10-methyl-dodecanoyl-ACP': 1,
 '10-methyl-trans-dodec-2-enoyl-ACP': 1,
 '10-methyl-trans-undec-2-enoyl-ACP': 1,
 '10-methyl-undecanoyl-ACP': 1,
 '11-methyl-3-hydroxy-dodecanoyl-ACP': 1,
 '11-methyl-3-oxo-dodecanoyl-ACP': 1,
 '11-methyl-dodecanoyl-ACP': 1,
 '11-methyl-trans-dodec-2-enoyl-ACP': 1,
 '12-methyl-3-hydroxy-tetra-decanoyl-ACP': 1,
 '12-methyl-3-hydroxy-tridecanoyl-ACP': 1,
 '12-methyl-3-oxo-tetra-decanoyl-ACP': 1,
 '12-methyl-3-oxo-tridecanoyl-ACP': 1,
 '12-methyl-tetra-decanoyl-ACP': 1,
 '12-methyl-trans-tetra-dec-2-enoyl-ACP': 1,
 '12-methyl-tridecanoyl-ACP': 1,
 '12-methyl-trans-tridec-2-enoyl-ACP': 1,
 '3-Phospho-D-glyceroyl phosphate': 1,
 '13-methyl-3-hydroxy-tetra-decanoyl-ACP': 1,
 '13-methyl-3-oxo-tetra-decanoyl-ACP': 1,
 '13-methyl-t

In [109]:
# filt_f = [val for val in formyl_tups if val[2][0]=='CHEBI:1']
# filt_f

In [110]:
# for idx, one_trip in enumerate(value_trips):
#   if idx>200:
#     break
#   print("%s : %s" % (one_trip[0], one_trip[1]))
#   print(one_trip[2])

In [51]:
# print(model.getSpecies('M_13dpg__91__c__93__').getAnnotationString())

In [55]:
recom.recommendSpecies(ids='M_12ppd_S__91__c__93__')

Unnamed: 0,file,type,id,display name,meta id,annotation,annotation label,match score,existing,UPDATE ANNOTATION
0,Abiotrophia_defectiva_ATCC_49176.xml,species,M_12ppd_S__91__c__93__,"(S)-propane-1,2-diol",M_12ppd_S__91__c__93__,CHEBI:29002,"(S)-propane-1,2-diol",1.0,0,ignore


In [37]:
bigg_all_species[3]

'S-Adenosylmethioninamine'

In [38]:
# value_triples
one_k = bigg_all_species[3]

val_triples = []

for idx, one_bigg in enumerate(biggs):
  if idx % 10 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  model = recom.sbml_document.getModel()  
  for val in model.getListOfSpecies():
    if val.getId() in recom.species.exist_annotation.keys():
      val_triples.append((one_bigg,
                          val.getId(),
                          one_k,
                          recom.species.exist_annotation[val.getId()])) 

We're at 0
We're at 10
We're at 20
We're at 30
We're at 40
We're at 50
We're at 60
We're at 70
We're at 80
We're at 90
We're at 100


In [21]:
# summary of biggs
biggs = [val for val in os.listdir(BIGG_DIR) if val[-3:]=='xml']

kins = dict()
all_spec_names = []

for idx, one_bigg in enumerate(biggs):
  if idx % 10 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  model = recom.sbml_document.getModel() 
  all_spec_names.append([recom.species.getNameToUse(val.getId()) \
                         for val in model.getListOfSpecies()])
  # checking kinetics
  one_set_kins = dict()
  for one_r in model.getListOfReactions():
    one_k = one_r.getKineticLaw()
    if one_k:
      one_set_kins[one_r.getId()] = one_k
  kins[one_bigg] = one_set_kins

We're at 0
We're at 10
We're at 20
We're at 30
We're at 40
We're at 50
We're at 60
We're at 70
We're at 80
We're at 90
We're at 100


In [39]:
len(cn.REF_CHEBI2LABEL)

159840

In [40]:
recom.species.getNameToUse('M_nadph_c')

'Nicotinamide adenine dinucleotide phosphate - reduced'

In [43]:
recom.species.getNameToUse('M_coa_c')

'Coenzyme A'

In [24]:
from collections import Counter
spec_names_with_rep = itertools.chain(*all_spec_names)
count_specs = dict(Counter(spec_names_with_rep))

In [31]:
one_name = list(count_specs.keys())[4]
res_models = dict()
spec_anots = []
for idx, one_bigg in enumerate(biggs):
  if idx % 10 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  model = recom.sbml_document.getModel() 
  spec_names = [val.getId() for val in model.getListOfSpecies() \
               if recom.species.getNameToUse(val.getId())==one_name]
  if spec_names:
    for one_spec in spec_names:
      spec_anots.append(model.getSpecies(one_spec).getAnnotationString())
  if len(spec_anots)>=100:
    break

We're at 0
We're at 10
We're at 20
We're at 30
We're at 40
We're at 50
We're at 60
We're at 70


In [32]:
for one_a in spec_anots:
  print(one_a)
  print()

<sbml:annotation xmlns:sbml="http://www.sbml.org/sbml/level3/version1/core">
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="#M_10fthf_c">
      <bqbiol:is xmlns:bqbiol="http://biomodels.net/biology-qualifiers/">
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/bigg.metabolite/10fthf"/>
          <rdf:li rdf:resource="http://identifiers.org/biocyc/META:10-FORMYL-THF"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:19108"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:15637"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:698"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:19109"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:11304"/>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:57454"/>
          <rdf:li rdf:resource="http://identifiers.org/hmdb/HMDB00972"/>
          <rdf:li rdf:res

In [19]:
# check if there are same named species with different annotations; 
spec_names_ll = []
[recom.species.getNameToUse(val.getId()) for val in model.getListOfSpecies()]

['Protoheme C34H30FeN4O4',
 'Molybdate',
 'Phosphatidylethanolamine (dioctadec-11-enoyl, n-C18:1)',
 'Coenzyme A',
 'Phosphatidylglycerol (dihexadec-9-enoyl, n-C16:1)',
 'Nicotinamide adenine dinucleotide phosphate - reduced',
 'Nickel',
 'H+',
 'Nicotinamide adenine dinucleotide phosphate',
 'L-Glutamine',
 'Molybdopterin cytosine dinucleotide',
 '5-Methyltetrahydrofolate',
 'Undecaprenyl diphosphate',
 'Acetyl-CoA',
 'Malonyl CoA C24H33N7O19P3S',
 'Core oligosaccharide lipid A',
 'L-Tryptophan',
 'P-Cresol',
 'Butyrate (n-C4:0)',
 'Butanesulfonate',
 "5'-deoxyribose",
 'DCTP C9H12N3O13P3',
 'Calcium',
 'Phosphatidylglycerol (dihexadecanoyl, n-C16:0)',
 'Cobinamide',
 'Aminoacetaldehyde',
 'Reduced glutathione',
 'S-Adenosyl-4-methylthio-2-oxobutanoate',
 'Cob(I)alamin',
 'Cadmium',
 'Thiamine diphosphate',
 'Cys Gly C5H10N2O3S',
 'Nicotinamide adenine dinucleotide - reduced',
 'Choline C5H14NO',
 '(2R,4S)-2-methyl-2,3,3,4-tetrahydroxytetrahydrofuran',
 'GTP C10H12N5O14P3',
 'Zinc',
 

In [13]:
import random
random.choice(model.getListOfSpecies())

<Species M_frdp_r "Farnesyl diphosphate">

In [None]:


total_chebis = []
total_rheas = []
total_specs = []
total_reacs = []

models = []
num_species = []
name_lengths = []
num_annotated_species = []
num_unique_chebis = []
num_reactions = []
num_components = []
num_annotated_reactions = []
num_unique_rheas = []


for idx, one_bigg in enumerate(biggs):
  if idx % 10 == 0:
    print("We're at", idx)
  one_fpath = os.path.join(BIGG_DIR, one_bigg)
  recom = recommender.Recommender(libsbml_fpath=one_fpath)
  models.append(one_bigg)
  num_species.append(len(recom.species.names))
  name_lengths.append(np.mean([len(recom.species.names[k]) \
                               for k in recom.species.names.keys()]))
  num_annotated_species.append(len(recom.species.exist_annotation))
  unique_chebis = list(set(itertools.chain(*[recom.species.exist_annotation[k] \
                  for k in recom.species.exist_annotation.keys()])))
  num_reactions.append(len(recom.reactions.reaction_components))
  num_components.append(np.mean([len(recom.reactions.reaction_components[k]) \
                           for k in recom.reactions.reaction_components.keys()]))
  num_annotated_reactions.append(len(recom.reactions.exist_annotation))
  unique_rheas = list(set(itertools.chain(*[recom.reactions.exist_annotation[k] \
                 for k in recom.reactions.exist_annotation.keys()])))
  num_unique_chebis.append(len(unique_chebis))
  num_unique_rheas.append(len(unique_rheas))
  total_specs.append(list(recom.species.names.keys()))
  total_reacs.append(list(recom.reactions.reaction_components.keys()))
  total_chebis.append(unique_chebis)
  total_rheas.append(unique_rheas)

bigg_summary = pd.DataFrame({'model': models,
                              'num_species': num_species,
                              'name_lengths': name_lengths,
                              'num_annotated_species': num_annotated_species,
                              'num_unique_chebis': num_unique_chebis,
                              'num_reactions': num_reactions, 
                              'num_components': num_components,
                              'num_annotated_reactions': num_annotated_reactions,
                              'num_unique_rheas': num_unique_rheas})
specs = list(set(itertools.chain(*total_specs)))
reacs = list(set(itertools.chain(*total_reacs)))
chebis = list(set(itertools.chain(*total_chebis)))
rheas = list(set(itertools.chain(*total_rheas)))
bigg_total = {'specs': specs, 'reacs': reacs,
              'chebis':chebis, 'rheas': rheas}

In [1]:
# check if any of them have kinetics, other than biomodels

In [None]:
# Kripendorff's alpha: checking  
# 