In [1]:
import pandas as pd
import requests
import jsonpath_ng
import re
from bioservices import  KEGG, KEGGParser
kegg = KEGG(verbose=True)

[32mINFO    [bioservices.KEGG:363]: [0m [32mInitialising KEGG service (REST)[0m


In [2]:
response = requests.get("https://rest.kegg.jp/get/br:ko00199/json")
json_brite = response.json()

In [3]:
path = jsonpath_ng.parse("children[*].children[*].children[*].name")
cyps = [match.value for match in path.find(json_brite)]

In [4]:
pattern = re.compile("^(K\d{5})  (.*); (.*?) ?(\[.*\])?$") 
cyps_df = pd.DataFrame(map(lambda s:pattern.match(s).groups(),cyps), columns=["kegg_id","protein_id","name","enzymes"])

In [5]:
ortho_data = [kegg.get(ortho,parse=True) for ortho in cyps_df['kegg_id']]

In [6]:
ortho_data

[{'ENTRY': 'K07408                      KO',
  'SYMBOL': 'CYP1A1',
  'NAME': ['cytochrome P450 family 1 subfamily A1 [EC:1.14.14.1]'],
  'PATHWAY': {'map00140': 'Steroid hormone biosynthesis',
   'map00380': 'Tryptophan metabolism',
   'map00830': 'Retinol metabolism',
   'map00980': 'Metabolism of xenobiotics by cytochrome P450',
   'map01100': 'Metabolic pathways',
   'map04913': 'Ovarian steroidogenesis',
   'map05204': 'Chemical carcinogenesis - DNA adducts',
   'map05207': 'Chemical carcinogenesis - receptor activation',
   'map05208': 'Chemical carcinogenesis - reactive oxygen species',
   'map05417': 'Lipid and atherosclerosis'},
  'BRITE': 'KEGG Orthology (KO) [BR:ko00001]\n             09100 Metabolism\n              09103 Lipid metabolism\n               00140 Steroid hormone biosynthesis\n                K07408  CYP1A1; cytochrome P450 family 1 subfamily A1\n              09105 Amino acid metabolism\n               00380 Tryptophan metabolism\n                K07408  CYP1A1;

In [7]:
list(map(lambda e: (e["ENTRY"],e.get("DBLINKS",None)),ortho_data))

[('K07408                      KO',
  {'RN': 'R02354 R02355 R03089 R03408 R03629 R07000 R07001 R07021 R07022 R07079 R07080 R07081 R07085 R07087 R07098 R07099 R08390 R08392 R09418 R09423 R09442'}),
 ('K07409                      KO',
  {'RN': 'R03408 R03629 R07000 R07001 R07021 R07022 R07055 R07056 R07098 R07099 R07939 R07943 R07945 R08293 R08294 R08392 R09405 R09407 R09408'}),
 ('K24532                      KO', None),
 ('K07410                      KO',
  {'RN': 'R03088 R03090 R03629 R07079 R07080 R07081 R07085 R07087 R09416 R09418 R09442'}),
 ('K17812                      KO', None),
 ('K17813                      KO', None),
 ('K17814                      KO', None),
 ('K17683            Tight     KO',
  {'RN': 'R07000 R07001 R07945 R08225 R08324 R08325 R08326 R08327 R08390 R08391 R08392 R09408 R09421 R09423 R09424 R09425'}),
 ('K17685                      KO',
  {'RN': 'R09406 R09408 R09421 R09423 R09424 R09425'}),
 ('K07411                      KO', {'RN': 'R08390 R08391 R08392'})

In [15]:
reactions_df = pd.DataFrame(
    map(
        lambda e: (e["ENTRY"], ((e.get("DBLINKS") or {}).get("RN") or "").split(" ")),
        ortho_data,
    ), columns=["protein","reactions"]
)


In [18]:
all_reactions ={r for rs in reactions_df["reactions"] for r in rs}

In [20]:
[kegg.get(r,parse=True) for r in all_reactions]

511