In [1]:
import json
import pandas as pd
import re

In [2]:
kegg_path = 'export/kegg.json'
brenda_path = 'export/brenda.json'

In [3]:
csv='kegg_pseudoisomers_Alberty.csv'
df=pd.read_csv(csv, sep='\t', header=1)

In [4]:
df.columns
df.head()

Unnamed: 0,!Identifiers:kegg.compound,!Name,!dG0 (kJ/mol),!nH,!charge,!nMg,!Note
0,C00001,C_h2o,-237.2,2,0,0,
1,C00002,C_atp,-2768.1,12,-4,0,
2,C00002,C_atp,-3258.7,12,-2,1,
3,C00002,C_atp,-3729.3,12,0,2,
4,C00002,C_atp,-2811.5,13,-3,0,


In [5]:
def read_json_data(path):
    with open(path) as json_file:
        return json.load(json_file)

In [6]:
kegg_data = read_json_data(kegg_path)
brenda_data = read_json_data(brenda_path)

In [7]:
enzyme_list = list(kegg_data.values())

In [39]:
# Example of the BrendaDB:
# brenda_enzymes = {
#             ecNumber: {
#                 'SYSNAME': "...", # should this be an array?
#                 'REACTIONS': [{reaction},],
#                 'NAME': ['...',],
#                 'SUBSTRATE': [{substrate},],
#                 'PRODUCT': [{product},],
#                 'PDB': [{pdb}]
#             },
#         }


#{ec number: [names: 'dfafsfsff'], substrates: [f, f, f,], products: [f,f,f]}

kegg_data['(7Z,10Z)-hexadeca-7,10-dienoyl-[glycerolipid],ferredoxin:oxygen oxidoreductase (13,14 cis-dehydrogenating)']['SUBSTRATE']

['(7Z,10Z)-hexadeca-7,10-dienoyl-[glycerolipid];',
 'reduced ferredoxin [iron-sulfur] cluster [CPD:C00138];',
 'O2 [CPD:C00007];',
 'H+ [CPD:C00080];',
 'linoleoyl-[glycerolipid] [CPD:C21529]']

In [46]:
brenda_substrates=brenda_data['1.14.19.35']['SUBSTRATE']
substrate_set=set()
for substrates in brenda_substrates:
    substrate_set.add(substrates['substrate'])
    
print(substrate_set)

{'2 H+', '2 reduced ferredoxin', 'more', 'a linoleoyl-[glycerolipid]', '2 reduced ferredoxin [iron-sulfur] cluster', 'a (7Z,10Z)-hexadeca-7,10-dienoyl-[glycerolipid]', 'alpha-linoleoyl-[glycerolipid]', 'O2'}


In [51]:
brenda_products=brenda_data['1.14.19.35']['PRODUCT']
product_set=set()
for products in brenda_products:
    product_set.add(products['product'])
print(product_set)

{'2 oxidized ferredoxin [iron-sulfur] cluster', '2 H2O', 'a (7Z,10Z,13Z)-hexadeca-7,10,13-trienoyl-[glycerolipid]', '2 oxidized ferredoxin', 'an alpha-linolenoyl-[glycerolipid]', '?', 'alpha-linolenoyl-[glycerolipid]'}


In [17]:
def substrate_mapper(names, substrates, products):
    return names + '\n' + substrates + '\n' + products
    
for enzyme in enzyme_list:
    names=enzyme.get('NAME', [])
    flavoenzyme_names="Flavoenzyme name: "
    substrate_names=enzyme.get('SUBSTRATE', [])
    substrates="substrates: "
    product_names=enzyme.get('PRODUCT', [])
    products="products: "
    for substrate in substrate_names:
        substrates+= substrate
    for product in product_names:
        products+=product
    for name in names:
        flavoenzyme_names+=name[0:]
    flavoenzyme_substrates=substrate_mapper(flavoenzyme_names, substrates, products)
    
    print(flavoenzyme_substrates)
    print ('\n')

Flavoenzyme name: sn-2 acyl-lipid omega-3 desaturase (ferredoxin);FAD7;FAD8
substrates: (7Z,10Z)-hexadeca-7,10-dienoyl-[glycerolipid];reduced ferredoxin [iron-sulfur] cluster [CPD:C00138];O2 [CPD:C00007];H+ [CPD:C00080];linoleoyl-[glycerolipid] [CPD:C21529]
products: (7Z,10Z,13Z)-hexadeca-7,10,13-trienoyl-[glycerolipid];oxidized ferredoxin [iron-sulfur] cluster [CPD:C00139];H2O [CPD:C00001];alpha-linolenoyl-[glycerolipid] [CPD:C21531]


Flavoenzyme name: thymidylate synthase (FAD);Thy1;ThyX
substrates: 5,10-methylenetetrahydrofolate [CPD:C00143];dUMP [CPD:C00365];NADPH [CPD:C00005];H+ [CPD:C00080]
products: dTMP [CPD:C00364];tetrahydrofolate [CPD:C00101];NADP+ [CPD:C00006]


Flavoenzyme name: methylenetetrahydrofolate---tRNA-(uracil54-C5)-methyltransferase [NAD(P)H-oxidizing];folate-dependent ribothymidyl synthase;methylenetetrahydrofolate-transfer ribonucleate uracil 5-methyltransferase;5,10-methylenetetrahydrofolate:tRNA-UPsiC (uracil-5-)-methyl-transferase;5,10-methylenetetrahydrofo

In [18]:
product_set=set()

In [19]:
for enzyme in enzyme_list:
    product_lst=enzyme.get('PRODUCT', [])
    for product in product_lst:
        product=product.replace(';', '')
        product_set.add(product)

In [20]:
substrate_set=set()

In [21]:
for enzyme in enzyme_list:
    substrate_lst=enzyme.get('SUBSTRATE', [])
    for substrate in substrate_lst:
        substrate=substrate.replace(';', '')
        substrate_set.add(substrate)

In [22]:
def cpd_to_free_energy(cpd_number):
    try:
        free_energy=df[df['!Identifiers:kegg.compound']==cpd_number]['!dG0 (kJ/mol)'].values[0]
        return free_energy
    except:
        return None

In [23]:
def energy_mapper(substrate, substrate_free_energy):
    return {substrate:substrate_free_energy}

In [24]:
pattern = '\[CPD:(.+)\]'
for substrate in substrate_set:
    cpd_number = re.findall(pattern, substrate)
    if cpd_number:
        cpd_number=cpd_number[0]
        substrate_free_energy=cpd_to_free_energy(cpd_number)
        substrate_energy_dict= energy_mapper(substrate, substrate_free_energy)
        print(substrate_energy_dict)

{'5-amino-5-(4-hydroxybenzyl)-6-(D-ribitylimino)-5,6-dihydrouracil [CPD:C21971]': None}
{'D-glucose [CPD:C00031]': -915.9}
{'19-(4-hydroxyphenyl)nonadecanoate [CPD:C21448]': None}
{'ATP [CPD:C00002]': -2768.1}
{'(R)-pantolactone [CPD:C01012]': None}
{'8-demethyl-8-(methylamino)riboflavin [CPD:C21648]': None}
{'acceptor [CPD:C00028]': None}
{'stearate [CPD:C01530]': None}
{'N,N-dimethyl-1,4-phenylenediamine [CPD:C04203]': None}
{'[protein]-L-threonine [CPD:C19803]': None}
{'8-amino-8-demethylriboflavin [CPD:C21646]': None}
{'riboflavin [CPD:C00255]': None}
{'3-[(3aS,4S,7aS)-7a-methyl-1,5-dioxo-octahydro-1H-inden-4-yl]propanoate [CPD:C20199]': None}
{"4'-O-demethylrebeccamycin [CPD:C19700]": None}
{'alkanesulfonate [CPD:C15521]': None}
{'alpha-D-glucose 1-phosphate [CPD:C00103]': -1756.9}
{'O2 [CPD:C00007]': 16.4}
{'N,N-dimethylaniline [CPD:C02846]': None}
{'linoleoyl-CoA [CPD:C02050]': None}
{'dichlorochromopyrrolate [CPD:C19698]': None}
{'7,8-didemethyl-8-hydroxy-5-deazariboflavin [CPD

In [25]:
for product in product_set:
    cpd_number = re.findall(pattern, product)
    if cpd_number:
        cpd_number=cpd_number[0]
        product_free_energy=cpd_to_free_energy(cpd_number)
        product_energy_dict= energy_mapper(product, product_free_energy)
print(product_energy_dict)

{"riboflavin cyclic-4',5'-phosphate [CPD:C16071]": None}
