In [30]:
import pandas

In [31]:
df = pandas.read_csv('compounds.csv') # generated by TECR-Hackathon-Cactus
df

Unnamed: 0,name,kegg,synonyms
0,benzyl alcohol(aq),kegg:C00556,[]
1,NAD(aq),kegg:C00003,"['NAD(aq)', 'NAD', 'NAD(ox)(aq)', 'NAD(ox)', '..."
2,benzaldehyde(aq),kegg:C00261,[]
3,NADH(aq),kegg:C00004,"['NADH(aq)', 'NAD(red)(aq)', 'NAD(red)']"
4,1-butanol(aq),kegg:C06142,[]
...,...,...,...
1047,D-Ribulose 5-phosphate,kegg:C00199,"['D-ribulose 5-phosphate(aq)', 'D-Ribulose 5-p..."
1048,Formaldehyde,kegg:C00067,"['formaldehyde(aq)', 'Formaldehyde']"
1049,D-arabino-Hex-3-ulose 6-phosphate,kegg:C06019,"['D-arabino-3-hexulose 6-phosphate(aq)', 'D-ar..."
1050,D-Fructose 6-phosphate,kegg:C00085,"['D-fructose 6-phosphate(aq)', 'D-Fructose 6-p..."


In [32]:
import pickle

In [33]:
with open('cactus_dict.pkl', 'rb') as f: # generated by TECR-Hackathon-Cactus
    cac = pickle.load(f)

In [34]:
inchikeys = {}
smiles = {}
for compound, row in df.iterrows():
    cname = row['name']
    inchikeys[compound] = None
    smiles[compound] = None
    if cname in cac and 'stdinchikey' in cac[cname]:
        inchikeys[compound] = cac[cname]['stdinchikey']
    if cname in cac and 'smiles' in cac[cname]:
        smiles[compound] = cac[cname]['smiles']

In [35]:
df['inchikey'] = inchikeys
df['smiles'] = smiles

In [36]:
with open('kegg_dict.pkl', 'rb') as f: # generated by TECR-Hackathon-KEGG-CHEBI
    kegg = pickle.load(f)

In [37]:
pubchems = {}
chebis = {}
for compound, row in df.iterrows():
    cname = row['name']
    pubchems[compound] = None
    chebis[compound] = None
    if cname in kegg and 'pubchem' in kegg[cname]:
        pubchems[compound] = kegg[cname]['pubchem'].strip()
    if cname in kegg and 'chebi' in kegg[cname]:
        chebis[compound] = kegg[cname]['chebi'][:kegg[cname]['chebi'].index('\n')]

In [38]:
df['chebis'] = chebis
df['pubchems'] = pubchems

In [39]:
import glob

In [40]:
rdkit_d = {}
for fx in glob.glob('images/rdkit/*'):
    rdkit_d[fx.split('/')[-1].split('.')[0]] = fx
kegg_d = {}
for fx in glob.glob('images/kegg/*'):
    kegg_d['kegg:'+fx.split('/')[-1].split('.')[0].split()[-1]] = fx

In [41]:
rdkit_img = {}
kegg_img = {}
for compound, row in df.iterrows():
    cname = row['name']
    rdkit_img[compound] = None
    kegg_img[compound] = None
    if cname in rdkit_d:
        rdkit_img[compound] = f'https://raw.githubusercontent.com/maxm4/hackathon-2025/refs/heads/main/maxime/{rdkit_d[cname]}'
    if not pandas.isna(row['kegg']):
        ckegg = row['kegg'].split()[-1] # should remove stoichiometry
        if ckegg in kegg_d:
            val = ckegg[ckegg.index(':')+1:]
            kegg_img[compound] = f'https://rest.kegg.jp/get/{val}/image'

In [42]:
df['rdkit_img'] = rdkit_img
df['kegg_img'] = kegg_img

In [43]:
df

Unnamed: 0,name,kegg,synonyms,inchikey,smiles,chebis,pubchems,rdkit_img,kegg_img
0,benzyl alcohol(aq),kegg:C00556,[],InChIKey=WVDDGKGOMKODPV-UHFFFAOYSA-N,OCc1ccccc1,chebi:17987,pubchem:3836,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00556/image
1,NAD(aq),kegg:C00003,"['NAD(aq)', 'NAD', 'NAD(ox)(aq)', 'NAD(ox)', '...",InChIKey=BAWFJGJZGIEFAR-NNYOXOHSSA-O,NC(=O)c1ccc[n+](c1)[C@@H]2O[C@H](CO[P](O)(=O)O...,chebi:15846,pubchem:3305,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00003/image
2,benzaldehyde(aq),kegg:C00261,[],InChIKey=HUMNYLRZRPPJDN-UHFFFAOYSA-N,O=Cc1ccccc1,chebi:17169,pubchem:3559,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00261/image
3,NADH(aq),kegg:C00004,"['NADH(aq)', 'NAD(red)(aq)', 'NAD(red)']",InChIKey=BOPGDPNILDQYTO-UHFFFAOYSA-N,NC(=O)C1=CN(C=CC1)C2OC(CO[P](O)(=O)O[P](O)(=O)...,chebi:16908,pubchem:3306,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00004/image
4,1-butanol(aq),kegg:C06142,[],InChIKey=LRHPLDYGYMQRHN-UHFFFAOYSA-N,CCCCO,chebi:28885,pubchem:8398,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C06142/image
...,...,...,...,...,...,...,...,...,...
1047,D-Ribulose 5-phosphate,kegg:C00199,"['D-ribulose 5-phosphate(aq)', 'D-Ribulose 5-p...",,,chebi:17363,pubchem:3499,,https://rest.kegg.jp/get/C00199/image
1048,Formaldehyde,kegg:C00067,"['formaldehyde(aq)', 'Formaldehyde']",,,chebi:16842,pubchem:3367,,https://rest.kegg.jp/get/C00067/image
1049,D-arabino-Hex-3-ulose 6-phosphate,kegg:C06019,"['D-arabino-3-hexulose 6-phosphate(aq)', 'D-ar...",,,chebi:27973,pubchem:8291,,https://rest.kegg.jp/get/C06019/image
1050,D-Fructose 6-phosphate,kegg:C00085,"['D-fructose 6-phosphate(aq)', 'D-Fructose 6-p...",,,chebi:15946,pubchem:3385,,https://rest.kegg.jp/get/C00085/image


In [44]:
df.to_csv('final_compound_csv.csv', index=None)

In [45]:
df

Unnamed: 0,name,kegg,synonyms,inchikey,smiles,chebis,pubchems,rdkit_img,kegg_img
0,benzyl alcohol(aq),kegg:C00556,[],InChIKey=WVDDGKGOMKODPV-UHFFFAOYSA-N,OCc1ccccc1,chebi:17987,pubchem:3836,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00556/image
1,NAD(aq),kegg:C00003,"['NAD(aq)', 'NAD', 'NAD(ox)(aq)', 'NAD(ox)', '...",InChIKey=BAWFJGJZGIEFAR-NNYOXOHSSA-O,NC(=O)c1ccc[n+](c1)[C@@H]2O[C@H](CO[P](O)(=O)O...,chebi:15846,pubchem:3305,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00003/image
2,benzaldehyde(aq),kegg:C00261,[],InChIKey=HUMNYLRZRPPJDN-UHFFFAOYSA-N,O=Cc1ccccc1,chebi:17169,pubchem:3559,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00261/image
3,NADH(aq),kegg:C00004,"['NADH(aq)', 'NAD(red)(aq)', 'NAD(red)']",InChIKey=BOPGDPNILDQYTO-UHFFFAOYSA-N,NC(=O)C1=CN(C=CC1)C2OC(CO[P](O)(=O)O[P](O)(=O)...,chebi:16908,pubchem:3306,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C00004/image
4,1-butanol(aq),kegg:C06142,[],InChIKey=LRHPLDYGYMQRHN-UHFFFAOYSA-N,CCCCO,chebi:28885,pubchem:8398,https://raw.githubusercontent.com/maxm4/hackat...,https://rest.kegg.jp/get/C06142/image
...,...,...,...,...,...,...,...,...,...
1047,D-Ribulose 5-phosphate,kegg:C00199,"['D-ribulose 5-phosphate(aq)', 'D-Ribulose 5-p...",,,chebi:17363,pubchem:3499,,https://rest.kegg.jp/get/C00199/image
1048,Formaldehyde,kegg:C00067,"['formaldehyde(aq)', 'Formaldehyde']",,,chebi:16842,pubchem:3367,,https://rest.kegg.jp/get/C00067/image
1049,D-arabino-Hex-3-ulose 6-phosphate,kegg:C06019,"['D-arabino-3-hexulose 6-phosphate(aq)', 'D-ar...",,,chebi:27973,pubchem:8291,,https://rest.kegg.jp/get/C06019/image
1050,D-Fructose 6-phosphate,kegg:C00085,"['D-fructose 6-phosphate(aq)', 'D-Fructose 6-p...",,,chebi:15946,pubchem:3385,,https://rest.kegg.jp/get/C00085/image
