In [1]:
# Using the constants' dictionaries (EC2RHEA, KEGG2RHEA)
# to get dictionary of RHEA -> EC/KEGG
# to modify existing annotations

import collections
import compress_pickle
import itertools
import libsbml
import numpy as np
import os
import pickle
import pandas as pd
import sys
import time
import matplotlib.pyplot as plt
%matplotlib inline  

BIOMD_12 = 'BIOMD0000000012.xml'
BASE_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/'
DATA_DIR = os.path.join(BASE_DIR, "DATA")
ALGO_DIR = os.path.join(DATA_DIR, "algo")
CHEBI_DIR = os.path.join(DATA_DIR, "chebi")
RHEA_DIR = os.path.join(DATA_DIR, "rhea")
BIOMODEL_DIR = os.path.join(DATA_DIR, "biomodels/curated_biomodels_31mar2021")
BIGG_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/DATA/bigg'
ecoli_fpath = os.path.join(BIGG_DIR, "e_coli_core.xml")

PROJ_DIR = os.path.join(os.getcwd(), os.pardir)
AMAS_DIR = os.path.join(PROJ_DIR, "AMAS")
sys.path.append(PROJ_DIR)

SUPPL_DIR = os.path.join(PROJ_DIR, os.pardir, "AMAS_suppl")
ACCURACY_DIR = os.path.join(SUPPL_DIR, "data_for_credibility")
RESULT_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/AMAS_suppl/result_files'

from AMAS import annotation_maker as am
from AMAS import species_annotation as sa
from AMAS import reaction_annotation as ra
from AMAS import recommender
from AMAS import constants as cn
from AMAS import iterator as it
from AMAS import tools

In [3]:
ec2rhea = cn.REF_EC2RHEA
kegg2rhea = cn.REF_KEGG2RHEA

In [11]:
rheas_lists = []
for one_k in ec2rhea:
  rheas_lists.append(ec2rhea[one_k])
for one_k in kegg2rhea:
  rheas_lists.append(kegg2rhea[one_k])
rheas = list(set(itertools.chain(*rheas_lists)))

In [14]:
len(rheas)

8218

In [21]:
rhea2eckegg_raw = {val:[] for val in rheas}
for one_rhea in rheas:
  for one_ec in ec2rhea:
    if one_rhea in ec2rhea[one_ec]:
      # ec has both full name and the one only with numbers...
      rhea2eckegg_raw[one_rhea].append(one_ec)
      rhea2eckegg_raw[one_rhea].append(one_ec.split(":")[1])
  for one_kegg in kegg2rhea:
    if one_rhea in kegg2rhea[one_kegg]:
      # kegg has both full name and that starts with R.....
      rhea2eckegg_raw[one_rhea].append(one_kegg)
      rhea2eckegg_raw[one_rhea].append(one_kegg.split(":")[1])
rhea2eckegg = {val:list(set(rhea2eckegg_raw[val])) for val in rhea2eckegg_raw.keys()}

In [24]:
import compress_pickle

LZMA_DIR = '/Users/woosubshin/Desktop/AutomateAnnotation/AMAS/AMAS/files'

compress_pickle.dump(rhea2eckegg, os.path.join(LZMA_DIR, 'mrhea2eckegg.lzma'),
                     compression="lzma", set_default_extension=False)

In [25]:
res = compress_pickle.load(os.path.join(LZMA_DIR, 'mrhea2eckegg.lzma'))

In [2]:
# develop method to get a full set associated with a rhea term
def getAssociatedTermsToRhea(inp_rhea):
  """
  Get a list of associated terms 
  of a rhea term. 
  The resulting list will contain 
  the original rhea term, 
  associated EC & KEGG numbers. 
  
  Parameters
  ----------
  inp_rhea: str
  
  Returns
  -------
  : list-str
  """
  if inp_rhea in cn.REF_RHEA2ECKEGG.keys():
    return cn.REF_RHEA2ECKEGG[inp_rhea] + [inp_rhea]
  else:
    return [inp_rhea]

In [4]:
getAssociatedTermsToRhea('AA')

['AA']