In [1]:
# update command line tool -> 
# (1) csv save (with more columns)
# (2) csv to model file update? (new script)
# (3) augment annotation?
import collections
import compress_pickle
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")
RESULT_DIR = '/Users/woosubs/Desktop/AutomateAnnotation/AMAS_suppl/result_files'

from AMAS import annotation_maker as am
from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [2]:
E_COLI_PATH = '/Users/woosubs/Desktop/AutomateAnnotation/AMAS/AMAS/../tests/e_coli_core.xml'
BIOMD_190_PATH = os.path.join(cn.TEST_DIR, 'BIOMD0000000190.xml')

In [3]:
recom = recommender.Recommender(libsbml_fpath=BIOMD_190_PATH)

In [4]:
recom.getSpeciesIDs()

['SAM', 'A', 'P', 'S', 'D', 'aS', 'aD', 'Met', 'ORN', 'AcCoA', 'CoA']

In [5]:
recom.species.exist_annotation_formula

{'SAM': ['C15N6O5S'],
 'A': ['C14N6O3S'],
 'P': ['C4N2'],
 'S': ['C10N4'],
 'D': ['C7N3'],
 'aS': ['C12N4O'],
 'aD': ['C9N3O'],
 'Met': ['C5NO2S'],
 'ORN': ['C5N2O2'],
 'AcCoA': ['C23N7O17P3S'],
 'CoA': ['C21N7O16P3S']}

In [6]:
recom.recommendSpecies(ids=['SAM', 'aS'], min_score=0.8)

                      SAM (credibility score: 0.975)                      
+----+--------------+---------------+------------------------------------+
|    | annotation   |   match score | label                              |
|  1 | CHEBI:59789  |         1.000 | S-adenosyl-L-methionine zwitterion |
+----+--------------+---------------+------------------------------------+
|  2 | CHEBI:15414  |         1.000 | S-adenosyl-L-methionine            |
+----+--------------+---------------+------------------------------------+

                  aS (credibility score: 0.972)                  
+----+--------------+---------------+---------------------------+
|    | annotation   |   match score | label                     |
|  1 | CHEBI:58101  |         1.000 | N(1)-acetylsperminium(3+) |
+----+--------------+---------------+---------------------------+
|  2 | CHEBI:17312  |         1.000 | N(1)-acetylspermine       |
+----+--------------+---------------+---------------------------+



In [7]:
recom.selectAnnotation(('SAM', [1,2]))

Selection updated.


In [8]:
recom.selectAnnotation(('aS', [1,2]))

Selection updated.


In [9]:
recom.selection

{'species': {'SAM':                     annotation  match score  \
  SAM (cred. 0.975)                             
  1                  CHEBI:59789          1.0   
  2                  CHEBI:15414          1.0   
  
                                                  label  
  SAM (cred. 0.975)                                      
  1                  S-adenosyl-L-methionine zwitterion  
  2                             S-adenosyl-L-methionine  ,
  'aS':                    annotation  match score                      label
  aS (cred. 0.972)                                                     
  1                 CHEBI:58101          1.0  N(1)-acetylsperminium(3+)
  2                 CHEBI:17312          1.0        N(1)-acetylspermine},
 'reaction': {}}

In [10]:
recom.recommendReaction(ids=['ODC', 'SAMdc'], min_score=0.8)

                                        ODC (credibility score: 0.817)                                       
+----+--------------+---------------+-----------------------------------------------------------------------+
|    | annotation   |   match score | label                                                                 |
|  1 | RHEA:28827   |         1.000 | L-ornithine(out) + putrescine(in) = L-ornithine(in) + putrescine(out) |
+----+--------------+---------------+-----------------------------------------------------------------------+

     SAMdc (credibility score: 0.587)     
+--------------+---------------+---------+
| annotation   | match score   | label   |
+--------------+---------------+---------+



In [11]:
recom.selectAnnotation(('ODC', 1))

Selection updated.


In [12]:
one_type = 'species'
type_selection = recom.selection[one_type]

# if species
TYPE_EXISTING_ATTR = {'species': recom.species.exist_annotation,
                      'reaction': recom.reactions.exist_annotation}


model = recom.sbml_document.getModel()
element_df = pd.concat([type_selection[k] for k in type_selection.keys()])

fnames = [recom.fname]*element_df.shape[0]
ids = list(itertools.chain(*[[k]*type_selection[k].shape[0] \
                           for k in type_selection.keys()]))
metaids = list(itertools.chain(*[[model.getSpecies(k).meta_id]*type_selection[k].shape[0] \
                                 for k in type_selection.keys()]))


display_names = list(itertools.chain(*[[model.getSpecies(k).name]*type_selection[k].shape[0] \
                                      for k in type_selection.keys()]))
existing = [1 if val in TYPE_EXISTING_ATTR[one_type][ids[idx]] else 0 \
            for idx, val in enumerate(element_df['annotation'])]
types = [one_type]*len(ids)
element_df.insert(0, "file", fnames)
element_df.insert(1, "type", types)
element_df.insert(2, "id", ids)
element_df.insert(3, "meta id", metaids)
element_df.insert(4, "display name", display_names)
element_df["existing"] = existing
element_df["USE ANNOTATION"] = 0

In [13]:
# # index is not going to be saved; 
# element_df

In [14]:
recom.selection

{'species': {'SAM':                     annotation  match score  \
  SAM (cred. 0.975)                             
  1                  CHEBI:59789          1.0   
  2                  CHEBI:15414          1.0   
  
                                                  label  
  SAM (cred. 0.975)                                      
  1                  S-adenosyl-L-methionine zwitterion  
  2                             S-adenosyl-L-methionine  ,
  'aS':                    annotation  match score                      label
  aS (cred. 0.972)                                                     
  1                 CHEBI:58101          1.0  N(1)-acetylsperminium(3+)
  2                 CHEBI:17312          1.0        N(1)-acetylspermine},
 'reaction': {'ODC':                    annotation  match score  \
  ODC (cred. 0.817)                            
  1                  RHEA:28827          1.0   
  
                                                                 label  
  ODC (cred. 0.

In [15]:
recom.saveToCSV()

In [None]:
# Now, use an updated csv file to update model annotation

In [16]:
os.getcwd()

'/Users/woosubs/Desktop/AutomateAnnotation/AMAS/notebooks'

In [17]:
RES_DIR = os.path.join(AMAS_DIR, 'res')

In [19]:
user_blessing = pd.read_csv(os.path.join(RES_DIR, 'species_recommendation.csv'))

In [22]:
chosen = user_blessing[user_blessing['USE ANNOTATION']==1]
chosen

Unnamed: 0,file,type,id,meta id,display name,annotation,match score,label,existing,USE ANNOTATION
0,BIOMD0000000190.xml,species,SAM,metaid_0000036,S-adenosyl-L-methionine,CHEBI:59789,1.0,S-adenosyl-L-methionine zwitterion,0,1
5,BIOMD0000000190.xml,species,S,metaid_0000039,Spermine,CHEBI:45725,1.0,spermine(4+),0,1
8,BIOMD0000000190.xml,species,D,metaid_0000040,Spermidine,CHEBI:16610,1.0,spermidine,1,1
13,BIOMD0000000190.xml,species,Met,metaid_0000043,Methionine,CHEBI:64558,1.0,methionine zwitterion,0,1
16,BIOMD0000000190.xml,species,ORN,metaid_0000044,L-Ornithine,CHEBI:46911,1.0,L-ornithinium(1+),0,1
18,BIOMD0000000190.xml,species,AcCoA,metaid_0000045,Acetyl-CoA,CHEBI:15351,1.0,acetyl-CoA,1,1
22,BIOMD0000000190.xml,species,CoA,metaid_0000046,CoA,CHEBI:15346,1.0,coenzyme A,1,1


In [53]:
reader = libsbml.SBMLReader()
document = reader.readSBML(BIOMD_190_PATH)
model = document.getModel()

element_types = list(np.unique(chosen['type']))
for one_type in element_types:
  maker = am.AnnotationMaker(one_type)
  df_type = chosen[chosen['type']==one_type]
  uids = list(np.unique(df_type['id']))
  meta_ids = {val:list(df_type[df_type['id']=='D']['meta id'])[0] for val in uids}
  for one_id in uids:
    one_annotation = maker.getAnnotationString(list(df_type[df_type['id']==one_id]['annotation']),
                                               meta_ids[one_id])
    model.getSpecies(one_id).setAnnotation(one_annotation)

fpath="model_amas_annotations.xml"
libsbml.writeSBMLToFile(document, fpath)

1

In [32]:
df_type

Unnamed: 0,file,type,id,meta id,display name,annotation,match score,label,existing,USE ANNOTATION
0,BIOMD0000000190.xml,species,SAM,metaid_0000036,S-adenosyl-L-methionine,CHEBI:59789,1.0,S-adenosyl-L-methionine zwitterion,0,1
5,BIOMD0000000190.xml,species,S,metaid_0000039,Spermine,CHEBI:45725,1.0,spermine(4+),0,1
8,BIOMD0000000190.xml,species,D,metaid_0000040,Spermidine,CHEBI:16610,1.0,spermidine,1,1
13,BIOMD0000000190.xml,species,Met,metaid_0000043,Methionine,CHEBI:64558,1.0,methionine zwitterion,0,1
16,BIOMD0000000190.xml,species,ORN,metaid_0000044,L-Ornithine,CHEBI:46911,1.0,L-ornithinium(1+),0,1
18,BIOMD0000000190.xml,species,AcCoA,metaid_0000045,Acetyl-CoA,CHEBI:15351,1.0,acetyl-CoA,1,1
22,BIOMD0000000190.xml,species,CoA,metaid_0000046,CoA,CHEBI:15346,1.0,coenzyme A,1,1


In [35]:
uids

['AcCoA', 'CoA', 'D', 'Met', 'ORN', 'S', 'SAM']

In [52]:
for one_id in uids:
  print(one_id)
  print(model.getSpecies(one_id).getAnnotationString())
  print('\n\n')

AcCoA
<annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">
    <rdf:Description rdf:about="#metaid_0000045">
      <bqbiol:is>
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:15351"/>
        </rdf:Bag>
      </bqbiol:is>
    </rdf:Description>
  </rdf:RDF>
</annotation>



CoA
<annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">
    <rdf:Description rdf:about="#metaid_0000046">
      <bqbiol:is>
        <

In [46]:
meta_ids

{'AcCoA': 'metaid_0000040',
 'CoA': 'metaid_0000040',
 'D': 'metaid_0000040',
 'Met': 'metaid_0000040',
 'ORN': 'metaid_0000040',
 'S': 'metaid_0000040',
 'SAM': 'metaid_0000040'}

In [48]:
print(one_annotation)

<annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vcard4="http://www.w3.org/2006/vcard/ns#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">
    <rdf:Description rdf:about="#metaid_0000040">
      <bqbiol:is>
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:59789"/>
        </rdf:Bag>
      </bqbiol:is>
    </rdf:Description>
  </rdf:RDF>
</annotation>
