In [1]:
import pandas as pd
from copy import deepcopy

In [3]:
# from rtoru import model as rt
# from syn_elong import model as syn
import sys
sys.path.append('/Users/mahs128/Repos/CONCERTO')

from concerto.helpers.load_model_from_git import load_model_from_git
syn = load_model_from_git('Synechococcus')
rt = load_model_from_git('Rhodosporidium')


In [55]:
# Create mtnx mapper
mtnx_mapper = {}

# Collect metabolites from Rt model
for i in rt.metabolites:
    this_i = i.annotation.get('kegg.compound')
    if this_i is not None:
        # Check if this_i is a list with multiple KEGG-IDs
        if isinstance(this_i, list):
            print(f"NOTE: identified multiple KEGG-IDs for this metabolite in Rt, {this_i} = {i.id}")
            for ii in this_i:
                mtnx_mapper[ii] = i.id[:-2]
        else:
            mtnx_mapper[this_i] = i.id[:-2]
print(len(mtnx_mapper))

# Collect metabolites from Syn model (if not already found in Rt model)
for i in syn.metabolites:
    this_i = i.annotation.get('kegg.compound')
    if this_i is not None:
        # Check if this_i is a list with multiple KEGG-IDs
        if isinstance(this_i, list):
            print(f"NOTE: identified multiple KEGG-IDs for this metabolite in Syn, {this_i} = {i.id}")
            for ii in this_i:
                mtnx_mapper[ii] = i.id[:-2]
        else:
            if not (this_i in mtnx_mapper.keys()):
                mtnx_mapper[this_i] = i.id[:-2]
print(len(mtnx_mapper))

# Correct metabolites using id-mapper file 
df = pd.read_json('id-mapper.json').T
for i,j in df['xrefs'].items():
    for item in j:
        if 'bigg.metabolite' in item:
            mtnx_mapper[i] = item.replace('bigg.metabolite:', '')
print(len(mtnx_mapper))

767
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C00047', 'C16440'] = lys__L_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C01401', 'C00041'] = ala__L_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C16439', 'C00123'] = leu__L_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C01353', 'C00288'] = hco3_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C00049', 'C16433'] = asp__L_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C00001', 'C01328'] = h2o_cx
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C00001', 'C01328'] = h2o_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C16688', 'C02591'] = suc6p_c
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C01353', 'C00288'] = hco3_cx
NOTE: identified multiple KEGG-IDs for this metabolite in Syn, ['C00094', 'C11481'] = so3_c
NOTE: identified multiple KEGG-IDs for this metabolite in 

In [56]:
mtnx_mapper

{'C00008': 'adp',
 'C00002': 'atp',
 'C00080': 'h',
 'C05853': '2phetoh',
 'C00006': 'nadp',
 'C00005': 'nadph',
 'C00601': 'pacald',
 'C00234': '10fthf',
 'C01328': 'h2o',
 'C00445': 'methf',
 'C00147': 'ade',
 'C00020': 'amp',
 'C03736': 'r5p',
 'C01180': '2kmb',
 'C15606': 'dhmtp',
 'C00058': 'for',
 'C00007': 'o2',
 'C00307': 'cdpchol',
 'C00055': 'cmp',
 'C00003': 'nad',
 'C00004': 'nadh',
 'C00042': 'succ',
 'C00232': 'sucsal',
 'C00022': 'pyr',
 'C00011': 'co2',
 'C01185': 'nicrnt',
 'C00013': 'ppi',
 'C00119': 'prpp',
 'C03722': 'quln',
 'C00332': 'aacoa',
 'C00024': 'accoa',
 'C00010': 'coa',
 'C00026': 'akg',
 'C00302': 'glu__L',
 'C00942': '35cgmp',
 'C00144': 'gmp',
 'C00054': 'pap',
 'C00053': 'paps',
 'C11481': 'so3',
 'C00343': 'trdox',
 'C00342': 'trdrd',
 'C00009': 'pi',
 'C00101': 'thf',
 'C01944': 'occoa',
 'C06423': 'octa',
 'C21057': 'rib__D',
 'C00106': 'ura',
 'C00299': 'uri',
 'C00084': 'acald',
 'C05275': 'dc2coa',
 'C00027': 'h2o2',
 'C04411': '3c2hmp',
 'C000

In [57]:
# Manually add a few more metabolites into the mapper
# Added by James
mtnx_mapper['C06186'] = 'arbt'
mtnx_mapper['C00095'] = 'fru'
mtnx_mapper['C00197'] = '3pg'
mtnx_mapper['C01970'] = 'lcts'
mtnx_mapper['C00247'] = 'srb__L'

# Added by Andrew & Shant
mtnx_mapper['C02457'] = '13ppd'
mtnx_mapper['C00587'] = '3hbz'
mtnx_mapper['C00180'] = 'bz'

# Can't find BIGG IDs for these KEGG IDs (Included in Pavlo's experiment)
# mtnx_mapper['C02502'] = ''
# mtnx_mapper['C10447'] = ''
# mtnx_mapper['C00852'] = ''
C02502 not found in any mappings
C10447 not found in any mappings
C00852 not found in any mappings
C16884 not found in any mappings
C00503 not found in any mappings
D01947 not found in any mappings
C01384 not found in any mappings
C01601 not found in any mappings
C02497 not found in any mappings
c06578 not found in any mappings
C16884 not found in any mappings
C03461 not found in any mappings
C03619 not found in any mappings
C00124 not found in any mappings
C05660 not found in any mappings
C16536 not found in any mappings
C07064 not found in any mappings
C07064 not found in any mappings
D08266 not found in any mappings
C07044 not found in any mappings
C01384 not found in any mappings
C05113 not found in any mappings
C08060 not found in any mappings
C12026 not found in any mappings
C16536 not found in any mappings
C01479 not found in any mappings
C07064 not found in any mappings
C05905 not found in any mappings
C01432 not found in any mappings
C00711 not found in any mappings

In [58]:
for k,v in mtnx_mapper.items():
    if 'nan' in v:
        print(k,v)


In [59]:
# Save the mapper to json
import json
mapper_fname = "KEGG_to_BIGG_mapper_for_Syn_Rt.json"
with open(mapper_fname, "w") as f:
    json.dump(mtnx_mapper, f, indent=4)


In [8]:
from_pavlo = pd.read_excel('Metabolites_IDs_short_clean.xlsx', skiprows=1)
from_pavlo['KEGG'] = from_pavlo['KEGG ']
del from_pavlo['KEGG ']
from_pavlo['short_name'] = from_pavlo['Metabolite abbreviation (* - score < 0.75; ** - no direct but best spectral match)']
from_pavlo = from_pavlo.loc[from_pavlo['Original metabolite from EMSL or JHU']!='Unknown'].copy()
from_pavlo = from_pavlo.loc[from_pavlo['Original metabolite from EMSL or JHU']!='No direct match'].copy()
from_pavlo = from_pavlo.loc[from_pavlo['Original metabolite from EMSL or JHU']!='not available'].copy()
from_pavlo['bigg_id'] = from_pavlo.KEGG.map(mtnx_mapper)

21

In [9]:
for_ids = from_pavlo[['short_name', 'bigg_id']].copy()
for_ids_map = for_ids.set_index('short_name').to_dict()['bigg_id']

In [10]:
syn_list = pd.read_csv('secreted_by_synechococcus.txt')
syn_list['bigg_id'] = syn_list.metab.replace(for_ids_map)
syn_list = syn_list.loc[syn_list.bigg_id.notnull()].copy()
syn_list.bigg_id.to_csv('..\excreted_syn.csv')

In [11]:
rt_list = pd.read_csv('secreted_by_rhodosporidium.txt')
rt_list['bigg_id'] = rt_list.metab.replace(for_ids_map)
rt_list = rt_list.loc[rt_list.bigg_id.notnull()].copy()
rt_list.bigg_id.to_csv('..\excreted_rt.csv')