In [44]:
from Bio.KEGG import REST
from Bio.KEGG import Enzyme

import gzip
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [45]:
enzyme_fields = [method for method in dir(Enzyme.Record()) if not method.startswith('_')]
data_matrix = []

with gzip.open('../datasets/KEGG_enzymes_all_data.gz', 'rt') as file:
    for record in Enzyme.parse(file):
        data_matrix.append([getattr(record, field) for field in enzyme_fields])

In [46]:
enzyme_df = pd.DataFrame(data_matrix, columns=enzyme_fields)

In [47]:
enzyme_df.head()

Unnamed: 0,classname,cofactor,comment,dblinks,disease,effector,entry,genes,inhibitor,name,pathway,product,reaction,structures,substrate,sysname
0,"[Oxidoreductases;, Acting on the CH-OH group o...",[],[A zinc protein. Acts on primary or secondary ...,"[(ExplorEnz - The Enzyme Database, [1.1.1.1]),...",[],[],1.1.1.1,"[(HSA, [124, 125, 126, 127, 128, 130, 131]), (...",[],"[alcohol dehydrogenase, aldehyde reductase, AD...","[(PATH, ec00010, Glycolysis / Gluconeogenesis)...","[aldehyde [CPD:C00071], NADH [CPD:C00004], H+ ...",[(1) a primary alcohol + NAD+ = an aldehyde + ...,[],"[primary alcohol [CPD:C00226], NAD+ [CPD:C0000...",[alcohol:NAD+ oxidoreductase]
1,"[Oxidoreductases;, Acting on the CH-OH group o...",[],[A zinc protein. Some members of this group ox...,"[(ExplorEnz - The Enzyme Database, [1.1.1.2]),...",[],[],1.1.1.2,"[(HSA, [10327]), (PTR, [741418]), (PPS, [10099...",[],"[alcohol dehydrogenase (NADP+), aldehyde reduc...","[(PATH, ec00010, Glycolysis / Gluconeogenesis)...","[aldehyde [CPD:C00071], NADPH [CPD:C00005], H+...",[an alcohol + NADP+ = an aldehyde + NADPH + H+...,[],"[alcohol [CPD:C00069], NADP+ [CPD:C00006]]",[alcohol:NADP+ oxidoreductase]
2,"[Oxidoreductases;, Acting on the CH-OH group o...",[],[The yeast enzyme acts most rapidly with NAD+;...,"[(ExplorEnz - The Enzyme Database, [1.1.1.3]),...",[],[],1.1.1.3,"[(NVE, [NEMVE_v1g225948]), (ATH, [AT1G31230, A...",[],"[homoserine dehydrogenase, HSDH, HSD]","[(PATH, ec00260, Glycine, serine and threonine...","[L-aspartate 4-semialdehyde [CPD:C00441], NADH...",[L-homoserine + NAD(P)+ = L-aspartate 4-semial...,[],"[L-homoserine [CPD:C00263], NAD+ [CPD:C00003],...",[L-homoserine:NAD(P)+ oxidoreductase]
3,"[Oxidoreductases;, Acting on the CH-OH group o...",[],[Also converts diacetyl into acetoin with NADH...,"[(ExplorEnz - The Enzyme Database, [1.1.1.4]),...",[],[],1.1.1.4,"[(SCE, [YAL060W, YAL061W]), (KLA, [KLLA0_F0050...",[],"[(R,R)-butanediol dehydrogenase, butyleneglyco...","[(PATH, ec00650, Butanoate metabolism)]","[(R)-acetoin [CPD:C00810], NADH [CPD:C00004], ...","[(R,R)-butane-2,3-diol + NAD+ = (R)-acetoin + ...",[],"[(R,R)-butane-2,3-diol [CPD:C03044], NAD+ [CPD...","[(R,R)-butane-2,3-diol:NAD+ oxidoreductase]"
4,"[Oxidoreductases;, Acting on the CH-OH group o...",[],[Transferred entry: acetoin dehydrogenase. Now...,[],[],[],1.1.1.5,[],[],[Transferred to 1.1.1.303 and 1.1.1.304],[],[],[],[],[],[]


In [5]:
# enzyme df search 
enzyme_df[enzyme_df.entry == '1.1.1.153']['reaction']

152    [(1) L-erythro-7,8-dihydrobiopterin + NADP+ = ...
Name: reaction, dtype: object

In [40]:
enzyme_df['reaction'][153]

['(S)-ureidoglycolate + NAD(P)+ = oxalureate + NAD(P)H + H+ [RN:R02935 R02936]']

In [48]:
promiscuous_df = enzyme_df[[True if len(rxn) > 1 else False for rxn in enzyme_df['reaction']]]
compact_promiscuous_df = promiscuous_df[['entry','reaction','product','substrate']]
compact_promiscuous_df.iloc[1,2]

['pyruvate [CPD:C00022]', 'CO2 [CPD:C00011]', 'NADH [CPD:C00004]']

In [42]:

# create a list of reactions that appear in promiscuous enzyme dataframe 
reaction_list = []
for index,row in compact_promiscuous_df.iterrows():
     for reaction in row[1]:
            if reaction.split("[RN:")[-1].startswith("R"):
                if not reaction.split("[RN:")[-1].startswith("RN"):
                     for i in reaction.split("[RN:")[-1][:-1].split(" "):
                            reaction_list.append(i)
            
reaction_list       


['R00623',
 'R00624',
 'R00214',
 'R00217',
 'R00216',
 'R00217',
 'R00267',
 'R01899',
 'R00268',
 'R10052',
 'R04426',
 'R01652',
 'R01370',
 'R03337',
 'R12256',
 'R02975',
 'R08208',
 'R03397',
 'R03399',
 'R03396',
 'R03398',
 'R10841',
 'R10842',
 'R03337',
 'R03339',
 'R03373',
 'R12252',
 'R10984',
 'R06126',
 'R06830',
 'R01872',
 'R06846',
 'R02413',
 'R06847',
 'R00709',
 'R01934',
 'R07143',
 'R07144',
 'R08919',
 'R06399',
 'R09388',
 'R08499',
 'R09926',
 'R09927',
 'R09928',
 'R09929',
 'R09989',
 'R09990',
 'R10310',
 'R10311',
 'R09756',
 'R10399',
 'R10400',
 'R07147',
 'R01096',
 'R10563',
 'R01041',
 'R10852',
 'R10851',
 'R03758',
 'R10986',
 'R10985',
 'R06830',
 'R11149',
 'R11147',
 'R11148',
 'R09055',
 'R11724',
 'R11726',
 'R10053',
 'R03184',
 'R00647',
 'R08212',
 'R01022',
 'R08211',
 'R05821',
 'R00055',
 'R05822',
 'R05695',
 'R05696',
 'R05697',
 'R10858',
 'R10853',
 'R10854',
 'R10855',
 'R10856',
 'R10857',
 'R07347',
 'R07348',
 'R11777',
 'R11802',

In [123]:
# run it when there is good internet connection
# append all the reactions that are reversible 
reversible_reaction = []
for reaction in reaction_list:
    reaction_file = REST.kegg_get(reaction).read()
    for i in reaction_file.rstrip().split("\n"):
        if i.startswith("EQUATION") and "<=>" in i:
            reversible_reaction.append(reaction)
            print (reaction)

R00623
R00624
R00214
R00217
R00216
R00217
R00267
R01899
R00268
R10052
R04426
R01652
R01370
R03337
R12256
R02975
R08208
R03397
R03399
R03396
R03398
R10841
R10842
R03337
R03339
R03373
R12252
R10984
R06126
R06830
R01872
R06846
R02413
R06847
R00709
R01934
R07143
R07144
R08919
R06399
R09388
R08499
R09926
R09927
R09928
R09929
R09989
R09990
R10310
R10311
R09756
R10399
R10400
R07147
R01096
R10563
R01041
R10852
R10851
R03758
R10986
R10985
R06830
R11149
R11147
R11148
R09055
R11724
R11726
R10053
R03184
R00647
R08212
R01022
R08211
R05821
R00055
R05822
R05695
R05696
R05697
R10858
R10853
R10854
R10855
R10856
R10857
R07347
R07348
R11777
R11802
R03102
R03103
R02317
R11231
R11232
R11679
R11351
R11352
R11913
R11914
R00977
R01414
R02609
R02893
R06419
R06420
R06784
R06785
R10798
R10799
R10800
R10801
R11426
R11427
R12105
R12106
R12107
R12108
R12109
R12110
R12112
R12113
R12144
R00640
R07679
R10329
R10327
R10328
R09652
R09653
R09654
R07511
R09656
R09658
R09053
R09060
R09690
R09719
R09744
R09745
R09746
R02488

R11855
R01544
R10773
R10774
R10775
R00213
R00220
R00590
R11100
R11099
R00221
R00996
R11101
R11098
R00697
R00737
R01083
R04559
R04558
R00256
R12152
R10089
R00256
R10088
R01001
R08632
R01283
R08633
R00654
R08635
R12188
R05761
R05762
R07634
R00782
R02408
R11023
R11864
R11865
R12062
R12059
R12060
R12061
R12066
R12063
R12064
R12065
R01031
R08584
R09033
R10883
R03073
R11630
R11631
R10818
R10817
R10280
R10281
R11721
R11722
R07320
R07321
R01518
R01516
R03275
R02852
R07503
R07502
R10623
R10624
R10847
R11083
R10767
R10541
R10543
R10768
R10544
R02794
R04580
R08905
R08906
R11608
R11922
R11537
R11923
R11924
R11538
R11925
R12056
R01917
R03822
R06529
R07302
R11143
R07770
R07771
R10942
R10943
R10944
R10941
R00573
R00256
R00571
R00149
R01231
R00256
R01230
R00578
R00256
R00483
R00575
R00256
R10948
R10949
R01395
R05815
R09598
R09599
R00381
R00382
R07640
R04274
R10537
R00381
R00382
R00381
R10822
R10823
R11325
R11885
R11886


In [127]:
# it seem like all the reactions are reversible 
len(reversible_reaction)

1302

In [49]:
rowindex = np.arange(0,len(compact_promiscuous_df))
rowindex

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [50]:
compact_promiscuous_df_index=compact_promiscuous_df.set_index(rowindex)
compact_promiscuous_df_index


Unnamed: 0,entry,reaction,product,substrate
0,1.1.1.1,[(1) a primary alcohol + NAD+ = an aldehyde + ...,"[aldehyde [CPD:C00071], NADH [CPD:C00004], H+ ...","[primary alcohol [CPD:C00226], NAD+ [CPD:C0000..."
1,1.1.1.38,[(1) (S)-malate + NAD+ = pyruvate + CO2 + NADH...,"[pyruvate [CPD:C00022], CO2 [CPD:C00011], NADH...","[(S)-malate [CPD:C00149], NAD+ [CPD:C00003], o..."
2,1.1.1.40,[(1) (S)-malate + NADP+ = pyruvate + CO2 + NAD...,"[pyruvate [CPD:C00022], CO2 [CPD:C00011], NADP...","[(S)-malate [CPD:C00149], NADP+ [CPD:C00006], ..."
3,1.1.1.42,[isocitrate + NADP+ = 2-oxoglutarate + CO2 + N...,"[2-oxoglutarate [CPD:C00026], CO2 [CPD:C00011]...","[isocitrate [CPD:C00311], NADP+ [CPD:C00006], ..."
4,1.1.1.85,"[(2R,3S)-3-isopropylmalate + NAD+ = 4-methyl-2...","[4-methyl-2-oxopentanoate [CPD:C00233], CO2 [C...","[(2R,3S)-3-isopropylmalate [CPD:C04411], NAD+ ..."
5,1.1.1.110,[(1) (R)-3-(phenyl)lactate + NAD+ = 3-phenylpy...,"[3-phenylpyruvate, NADH [CPD:C00004], H+ [CPD:...","[(R)-3-(phenyl)lactate [CPD:C05607], NAD+ [CPD..."
6,1.1.1.153,"[(1) L-erythro-7,8-dihydrobiopterin + NADP+ = ...","[sepiapterin [CPD:C00835], NADPH [CPD:C00005],...","[L-erythro-7,8-dihydrobiopterin [CPD:C02953], ..."
7,1.1.1.187,[(1) GDP-alpha-D-rhamnose + NAD(P)+ = GDP-4-de...,"[GDP-4-dehydro-alpha-D-rhamnose [CPD:C01222], ...","[GDP-alpha-D-rhamnose [CPD:C03117], NAD+ [CPD:..."
8,1.1.1.203,[(1) beta-D-galacturonate + NAD+ = D-galactaro...,"[D-galactaro-1,5-lactone [CPD:C20889], NADH [C...","[beta-D-galacturonate, NAD+ [CPD:C00003], beta..."
9,1.1.1.237,[(1) (R)-3-(4-hydroxyphenyl)lactate + NAD(P)+ ...,"[3-(4-hydroxyphenyl)pyruvate [CPD:C01179], NAD...","[(R)-3-(4-hydroxyphenyl)lactate [CPD:C03964], ..."


In [51]:
for index,row in compact_promiscuous_df_index.iterrows():
    productlist = row['product']
    substratelist = row['substrate']
    for substrate in substratelist:
        productlist.append(substrate)
    compact_promiscuous_df_index.iloc[index,2] = productlist

In [56]:
substrate_to_product_promiscuous_df = compact_promiscuous_df_index
substrate_to_product_promiscuous_df

Unnamed: 0,entry,reaction,product,substrate
0,1.1.1.1,[(1) a primary alcohol + NAD+ = an aldehyde + ...,"[aldehyde [CPD:C00071], NADH [CPD:C00004], H+ ...","[primary alcohol [CPD:C00226], NAD+ [CPD:C0000..."
1,1.1.1.38,[(1) (S)-malate + NAD+ = pyruvate + CO2 + NADH...,"[pyruvate [CPD:C00022], CO2 [CPD:C00011], NADH...","[(S)-malate [CPD:C00149], NAD+ [CPD:C00003], o..."
2,1.1.1.40,[(1) (S)-malate + NADP+ = pyruvate + CO2 + NAD...,"[pyruvate [CPD:C00022], CO2 [CPD:C00011], NADP...","[(S)-malate [CPD:C00149], NADP+ [CPD:C00006], ..."
3,1.1.1.42,[isocitrate + NADP+ = 2-oxoglutarate + CO2 + N...,"[2-oxoglutarate [CPD:C00026], CO2 [CPD:C00011]...","[isocitrate [CPD:C00311], NADP+ [CPD:C00006], ..."
4,1.1.1.85,"[(2R,3S)-3-isopropylmalate + NAD+ = 4-methyl-2...","[4-methyl-2-oxopentanoate [CPD:C00233], CO2 [C...","[(2R,3S)-3-isopropylmalate [CPD:C04411], NAD+ ..."
5,1.1.1.110,[(1) (R)-3-(phenyl)lactate + NAD+ = 3-phenylpy...,"[3-phenylpyruvate, NADH [CPD:C00004], H+ [CPD:...","[(R)-3-(phenyl)lactate [CPD:C05607], NAD+ [CPD..."
6,1.1.1.153,"[(1) L-erythro-7,8-dihydrobiopterin + NADP+ = ...","[sepiapterin [CPD:C00835], NADPH [CPD:C00005],...","[L-erythro-7,8-dihydrobiopterin [CPD:C02953], ..."
7,1.1.1.187,[(1) GDP-alpha-D-rhamnose + NAD(P)+ = GDP-4-de...,"[GDP-4-dehydro-alpha-D-rhamnose [CPD:C01222], ...","[GDP-alpha-D-rhamnose [CPD:C03117], NAD+ [CPD:..."
8,1.1.1.203,[(1) beta-D-galacturonate + NAD+ = D-galactaro...,"[D-galactaro-1,5-lactone [CPD:C20889], NADH [C...","[beta-D-galacturonate, NAD+ [CPD:C00003], beta..."
9,1.1.1.237,[(1) (R)-3-(4-hydroxyphenyl)lactate + NAD(P)+ ...,"[3-(4-hydroxyphenyl)pyruvate [CPD:C01179], NAD...","[(R)-3-(4-hydroxyphenyl)lactate [CPD:C03964], ..."


In [57]:
substrate_to_product_promiscuous_df = substrate_to_product_promiscuous_df[['entry','reaction','product']]

In [59]:
# save substrate and product combined dataframe to csv 
# substrate_to_product_promiscuous_df.to_csv("../datasets/substrate_product_combined_promiscuous.csv")