In [1]:
from Bio.KEGG.REST import kegg_link, kegg_list, kegg_get
from Bio.KEGG.KGML import KGML_parser
import pandas as pd
from io import StringIO
from tqdm import tqdm


def get_taxon_maps(kegg_prefix):
    if kegg_prefix is None:
        return []
    df = pd.read_csv(StringIO(kegg_list("pathway", kegg_prefix).read()), sep='\t', header=None)
    return df[0].apply(lambda x: x.split(kegg_prefix)[1]).tolist()


def get_reactions(kgml):
    if len(kgml.reactions) > 0:
        return [reaction.name.split('rn:')[1] for reaction in kgml.reactions]
    return []


def get_metabolites(kgml):
    if len(kgml.compounds) > 0:
        return [metabolite.name.split('cpd:')[1] for metabolite in kgml.compounds]
    return []

In [2]:
mmaps = get_taxon_maps('mfi')
reactions, metabolites = [], []
for mmap in tqdm(mmaps):
    try:
        kgml = KGML_parser.read(kegg_get(f"mfi{mmap}", "kgml").read())
        reactions += get_reactions(kgml)
        metabolites += get_metabolites(kgml)
    except:
        print(f'Failed for {mmap}')

with open('../Results/formicicum_reactions.txt', 'w') as f:
    f.write('\n'.join(set(reactions)))
with open('../Results/formicicum_metabolites.txt', 'w') as f:
    f.write('\n'.join(set(metabolites)))

 43%|████▎     | 40/93 [01:29<01:55,  2.18s/it]

Failed for 00552


 70%|██████▉   | 65/93 [02:33<02:23,  5.13s/it]

Failed for 01100


 84%|████████▍ | 78/93 [03:12<00:40,  2.69s/it]

Failed for 02020


100%|██████████| 93/93 [03:39<00:00,  2.36s/it]
