In [1]:
import numpy as np
import pdb
import sys
import json
import re
import os.path
import glob, os


from rdkit import Chem
from rdkit.Chem import AllChem, rdmolops, AddHs, Draw
import pandas as pd
from tqdm import tqdm 
from PIL import Image

In [13]:
with open('./../data/metanetx_metab_db_all.json') as mnx_met:
    metanetx_metab_db = json.load(mnx_met)
    

with open('./../data/MNXM_to_KEGGids.json') as kegg_to_mnxm:
    kegg_to_mnxm_dict = json.load(kegg_to_mnxm)


In [14]:
met_ls = list(metanetx_metab_db.keys())
MNXM_id = met_ls

kegg_id = list(kegg_to_mnxm_dict.keys())


In [15]:
metanetx_metab_db[MNXM_id[1000]]

{'Name': '1-(14Z,17Z,20Z,23Z,26Z-dotriacontapentaenoyl)-2-(13-methyltetradecanoyl)-sn-glycero-3-phospho-N-dodecanoyl-ethanolamine',
 'Formula': 'C64H115NO9P',
 'Charge': '-1',
 'Mass': '1072.83149',
 'InChI': 'InChI=1S/C64H116NO9P/c1-5-7-9-11-13-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-38-42-46-50-54-63(67)71-58-61(74-64(68)55-51-47-43-39-35-37-40-44-48-52-60(3)4)59-73-75(69,70)72-57-56-65-62(66)53-49-45-41-36-14-12-10-8-6-2/h13,15,17-18,20-21,23-24,26-27,60-61H,5-12,14,16,19,22,25,28-59H2,1-4H3,(H,65,66)(H,69,70)/p-1/b15-13-,18-17-,21-20-,24-23-,27-26-/t61-/m1/s1',
 'InChIKey': 'InChIKey=DAAMQYDFEKBAGH-LBOSQYDESA-M',
 'SMILES': 'CCCCC/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\CCCCCCCCCCCCC(=O)OC[C@H](COP(=O)([O-])OCCNC(=O)CCCCCCCCCCC)OC(=O)CCCCCCCCCCCC(C)C',
 'Reference': 'slm:000693289'}

In [16]:
kegg_to_mnxm_dict[kegg_id[10]]

'MNXM10060'

In [17]:
# Metanetx dictionary
metanetx_dict = metanetx_metab_db

# KEGG to Metanetx mapping
kegg_to_metanetx = kegg_to_mnxm_dict


def get_info(identifier):
    result = {}
    if identifier in metanetx_dict:
        result['Metanetx ID'] = identifier
        result.update(metanetx_dict.get(identifier))
        for kegg_id, metanetx_id in kegg_to_metanetx.items():
            if metanetx_id == identifier:
                result['KEGG ID'] = kegg_id
    elif identifier in kegg_to_metanetx:
        metanetx_id = kegg_to_metanetx.get(identifier)
        result['KEGG ID'] = identifier
        result.update(metanetx_dict.get(metanetx_id))
        result['Metanetx ID'] = metanetx_id
    else:
        for metanetx_id, info in metanetx_dict.items():
            if info.get('Name') == identifier:
                result['Metanetx ID'] = metanetx_id
                result.update(info)
            elif info.get('Formula') == identifier:
                result['Metanetx ID'] = metanetx_id
                result.update(info)
            elif info.get('InChI') == identifier:
                result['Metanetx ID'] = metanetx_id
                result.update(info)
            elif info.get('SMILES') == identifier:
                result['Metanetx ID'] = metanetx_id
                result.update(info)
        for kegg_id, metanetx_id in kegg_to_metanetx.items():
            if metanetx_id == result.get('Metanetx ID'):
                result['KEGG ID'] = kegg_id
    return result

def draw_molecule(identifier, save_path=None):
    if identifier.startswith('InChI='):
        mol = Chem.MolFromInchi(identifier)
    elif identifier.startswith('[') and identifier.endswith(']'):
        mol = Chem.MolFromSmiles(identifier)
    else:
        info = get_info(identifier)
        if info:
            mol = Chem.MolFromSmiles(info.get('SMILES'))
        else:
            mol = None
    if mol:
        Draw.MolToImage(mol).show()
        if save_path:
            Draw.MolToFile(mol, save_path)
    else:
        print("Unable to draw molecule.")



In [18]:
# Example usage
info = get_info('MNXM1002')
print(info)
draw_molecule(info['InChI'], save_path='mol_image.svg')            # image format can be changed to any types including jpg, png, tiff, pdf...


{'Metanetx ID': 'MNXM1002', 'Name': 'Sepiapterin', 'Formula': 'C9H11N5O3', 'Charge': '0', 'Mass': '237.08619', 'InChI': 'InChI=1S/C9H11N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3,15H,2H2,1H3,(H4,10,11,13,14,17)/t3-/m0/s1', 'InChIKey': 'InChIKey=VPVOXUSPXFPWBN-VKHMYHEASA-N', 'SMILES': 'C[C@H](O)C(=O)C1=Nc2c(nc(N)[nH]c2=O)NC1', 'Reference': 'keggC:C00835', 'KEGG ID': 'C00835'}


/usr/bin/xdg-open: line 862: x-www-browser: command not found
/usr/bin/xdg-open: line 862: firefox: command not found
/usr/bin/xdg-open: line 862: iceweasel: command not found
/usr/bin/xdg-open: line 862: seamonkey: command not found
/usr/bin/xdg-open: line 862: mozilla: command not found
/usr/bin/xdg-open: line 862: epiphany: command not found
/usr/bin/xdg-open: line 862: konqueror: command not found
/usr/bin/xdg-open: line 862: chromium: command not found
/usr/bin/xdg-open: line 862: chromium-browser: command not found
/usr/bin/xdg-open: line 862: google-chrome: command not found
/usr/bin/xdg-open: line 862: www-browser: command not found
/usr/bin/xdg-open: line 862: links2: command not found
/usr/bin/xdg-open: line 862: elinks: command not found
/usr/bin/xdg-open: line 862: links: command not found
/usr/bin/xdg-open: line 862: lynx: command not found
/usr/bin/xdg-open: line 862: w3m: command not found
xdg-open: no method available for opening '/tmp/tmp19idqyw2.PNG'


In [19]:

info = get_info('InChI=1S/C9H11N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3,15H,2H2,1H3,(H4,10,11,13,14,17)/t3-/m0/s1')
print(info)

{'Metanetx ID': 'MNXM1002', 'Name': 'Sepiapterin', 'Formula': 'C9H11N5O3', 'Charge': '0', 'Mass': '237.08619', 'InChI': 'InChI=1S/C9H11N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3,15H,2H2,1H3,(H4,10,11,13,14,17)/t3-/m0/s1', 'InChIKey': 'InChIKey=VPVOXUSPXFPWBN-VKHMYHEASA-N', 'SMILES': 'C[C@H](O)C(=O)C1=Nc2c(nc(N)[nH]c2=O)NC1', 'Reference': 'keggC:C00835', 'KEGG ID': 'C00835'}


In [20]:
info = get_info('C[C@H](O)C(=O)C1=Nc2c(nc(N)[nH]c2=O)NC1')
print(info)

{'Metanetx ID': 'MNXM1002', 'Name': 'Sepiapterin', 'Formula': 'C9H11N5O3', 'Charge': '0', 'Mass': '237.08619', 'InChI': 'InChI=1S/C9H11N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3,15H,2H2,1H3,(H4,10,11,13,14,17)/t3-/m0/s1', 'InChIKey': 'InChIKey=VPVOXUSPXFPWBN-VKHMYHEASA-N', 'SMILES': 'C[C@H](O)C(=O)C1=Nc2c(nc(N)[nH]c2=O)NC1', 'Reference': 'keggC:C00835', 'KEGG ID': 'C00835'}


In [21]:

info = get_info('Sepiapterin')
print(info)

{'Metanetx ID': 'MNXM1002', 'Name': 'Sepiapterin', 'Formula': 'C9H11N5O3', 'Charge': '0', 'Mass': '237.08619', 'InChI': 'InChI=1S/C9H11N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3,15H,2H2,1H3,(H4,10,11,13,14,17)/t3-/m0/s1', 'InChIKey': 'InChIKey=VPVOXUSPXFPWBN-VKHMYHEASA-N', 'SMILES': 'C[C@H](O)C(=O)C1=Nc2c(nc(N)[nH]c2=O)NC1', 'Reference': 'keggC:C00835', 'KEGG ID': 'C00835'}
