In [1]:
import pandas as pd
import numpy as np
import os
from rdkit import Chem
from rdkit.Chem import Crippen
from rdkit.Chem import Descriptors
from bioservices import *
from data_preprocessing import *
import warnings
warnings.filterwarnings('ignore')

CURRENT_DIR = os.getcwd()

In [2]:
def get_first_element(x):
    try: return x[0]
    except: return np.nan

# load BRENDA data from pkl file
df_Brenda = pd.read_pickle("../../data/kcat_data/BRENDA_kcat.pkl")

# extract uniprot ID from UNIPROT_list and then drop rows without uniprot ID
df_Brenda["Uniprot ID"] = df_Brenda["UNIPROT_list"].apply(lambda x: get_first_element(x))
df_Brenda = df_Brenda.dropna(subset=["Uniprot ID"])

# drop unnecessary columns and some renaming
df_Brenda.drop(columns = ["index", "ID", "comment", "kcat", "kcat_new", "enzyme",
                         "new", "LITERATURE", "UNIPROT_list", "new enzyme"],
               inplace = True)
df_Brenda.rename(columns = {"correct kcat" : "kcat", "correct reaction ID" : "BRENDA reaction ID",
                           "substrate_ID_list" : "substrate_IDs", 
                           "product_ID_list" : "product_IDs"}, inplace = True)

df_Brenda["from BRENDA"] = 1
df_Brenda["from Uniprot"] = 0
df_Brenda["from Sabio"] = 0

# some stats
print("Number of data points: %s" % len(df_Brenda))
print("Number of UniProt IDs: %s" % len(set(df_Brenda["Uniprot ID"])))
print("Number of checked data points: %s" % len(df_Brenda.loc[df_Brenda["checked"]]))
print("Number of unchecked data points: %s" % len(df_Brenda.loc[~df_Brenda["checked"]]))

Number of data points: 8267
Number of UniProt IDs: 3149
Number of checked data points: 3611
Number of unchecked data points: 4656


In [3]:
# load Sabio data from pkl file
df_Sabio = pd.read_pickle("../../data/kcat_data/Sabio_kcat.pkl")

# drop unnecessary columns and some renaming
df_Sabio.drop(columns = ["unit", "complete", "KEGG ID"], inplace = True)
df_Sabio.rename(columns = {"products_IDs": "product_IDs"}, inplace = True)

df_Sabio["checked"] = False
df_Sabio["#UIDs"] = 1
df_Sabio["complete"] = True
 
df_Sabio["from BRENDA"] = 0
df_Sabio["from Uniprot"] = 0
df_Sabio["from Sabio"] = 1

# some stats
print("Number of data points: %s" % len(df_Sabio))
print("Number of UniProt IDs: %s" % len(set(df_Sabio["Uniprot ID"])))

Number of data points: 2830
Number of UniProt IDs: 289


In [4]:
df_Uniprot = pd.read_pickle("../../data/kcat_data/Uniprot_kcat.pkl")

# drop unnecessary columns and some renaming
df_Uniprot.drop(columns = ["unit", "reaction ID"], inplace = True)
df_Uniprot.rename(columns = {"substrate CHEBI IDs" : "Substrates", "product CHEBI IDs" : "Products", 
                            "substrate InChIs" : "substrate_IDs", "product InChIs" : "product_IDs",
                            "kcat [1/sec]" : "kcat"}, inplace = True)

df_Uniprot["checked"] = False
df_Uniprot["#UIDs"] = 1

df_Uniprot["from BRENDA"] = 0
df_Uniprot["from Uniprot"] = 1
df_Uniprot["from Sabio"] = 0

# some stats
print("Number of data points: %s" % len(df_Uniprot))
print("Number of UniProt IDs: %s" % len(set(df_Uniprot["Uniprot ID"])))

Number of data points: 1738
Number of UniProt IDs: 1054


In [5]:
# combine all data in df_Sabio, df_Brenda, df_Uniprot and then drop rows without kcat
df_kcat = pd.concat([df_Sabio, df_Brenda, df_Uniprot], ignore_index=True)
df_kcat.dropna(subset=["kcat"], inplace=True)
df_kcat.replace(np.nan, None, inplace=True)

# some stats
print("Number of data points: %s" % len(df_kcat))
print("Number of UniProt IDs: %s" % len(set(df_kcat["Uniprot ID"])))

df_kcat.to_pickle("../../data/kcat_data/kcat_data_merged.pkl")

Number of data points: 12806
Number of UniProt IDs: 4124


In [6]:
# Define a dictionary to specify transformation functions for each column
transforms = {
    "from BRENDA": "max",
    "from Uniprot": "max",
    "from Sabio": "max",
    "checked": "any"
}

# Apply the transformation to each group
for col, func in transforms.items():
    df_kcat[col] = df_kcat.groupby(["Uniprot ID", "kcat"])[col].transform(func)

# Drop duplicates based on "Uniprot ID" and "kcat"
df_kcat.drop_duplicates(subset=["Uniprot ID", "kcat"],inplace=True)

In [7]:
# Get unique "Uniprot ID" values
uniprot_IDs = df_kcat["Uniprot ID"].unique()

# Write to file
pd.Series(uniprot_IDs).to_csv("../../data/enzyme_data/UNIPROT_IDs.txt", index=False, header=False)

In [8]:
uniprot_df = pd.read_csv("../../data/enzyme_data/UNIPROT_results.tab", sep = "\t").drop(columns = ["Entry"])
df_kcat = df_kcat.merge(uniprot_df, how="left", on="Uniprot ID").dropna(subset=["Uniprot ID"])
print(f"Number of different amino acid sequences in the dataset: {df_kcat['Sequence'].nunique()}")

Number of different amino acid sequences in the dataset: 3947


In [9]:
kegg_con = KEGG()
chebi_con = ChEBI()

met_IDs = [id for sublist in (df_kcat["substrate_IDs"] + df_kcat["product_IDs"]) if sublist for id in sublist]
df_metabolites = pd.DataFrame(set(met_IDs), columns = ["metabolite ID"])
df_metabolites = df_metabolites[df_metabolites["metabolite ID"] != ""]
df_metabolites["InChI"] = None

In [10]:
def process_row(row):
    met = row["metabolite ID"]
    if met.startswith("InChI"): return met
    else:
        try:
            kegg_entry = kegg_con.parse(kegg_con.get(met))
            chebi_entry = chebi_con.getCompleteEntity('CHEBI:' + kegg_entry['DBLINKS']['ChEBI'])
            return chebi_entry.inchi
        except: pass

df_metabolites["InChI"] = df_metabolites.apply(process_row, axis=1)

In [11]:
df_metabolites

Unnamed: 0,metabolite ID,InChI
1,InChI=1S/C31H43N6O16P/c1-13-22-14(2)36(12-37(2...,InChI=1S/C31H43N6O16P/c1-13-22-14(2)36(12-37(2...
2,"InChI=1S/CH4NO5P/c2-1(3)7-8(4,5)6/h(H2,2,3)(H2...","InChI=1S/CH4NO5P/c2-1(3)7-8(4,5)6/h(H2,2,3)(H2..."
3,"InChI=1S/C5H6N2O3/c8-2-3-1-6-5(10)7-4(3)9/h1,8...","InChI=1S/C5H6N2O3/c8-2-3-1-6-5(10)7-4(3)9/h1,8..."
4,"InChI=1S/C5H13O8P/c6-1-3(7)5(9)4(8)2-13-14(10,...","InChI=1S/C5H13O8P/c6-1-3(7)5(9)4(8)2-13-14(10,..."
5,InChI=1S/C18H32O16/c19-1-5(21)9(23)10(24)6(22)...,InChI=1S/C18H32O16/c19-1-5(21)9(23)10(24)6(22)...
...,...,...
4918,InChI=1S/C39H66N7O17P3S/c1-4-5-6-7-8-9-10-11-1...,InChI=1S/C39H66N7O17P3S/c1-4-5-6-7-8-9-10-11-1...
4919,InChI=1S/C9H13NO4/c1-5(2)8(9(13)14)10-6(11)3-4...,InChI=1S/C9H13NO4/c1-5(2)8(9(13)14)10-6(11)3-4...
4920,InChI=1S/C28H33N7O7/c1-17-12-25(38)42-22-13-19...,InChI=1S/C28H33N7O7/c1-17-12-25(38)42-22-13-19...
4921,InChI=1S/C15H10O7.ClH/c16-7-3-9(17)8-5-12(20)1...,InChI=1S/C15H10O7.ClH/c16-7-3-9(17)8-5-12(20)1...


In [12]:
def process_row(row):
    if pd.isnull(row["InChI"]):
        try:
            path = f"../../data/metabolite_data/mol-files/{row['metabolite ID']}.mol"
            return Chem.MolToInchi(Chem.MolFromMolFile(path))
        except: return None
    else: return row["InChI"]

df_metabolites["InChI"] = df_metabolites.apply(process_row, axis=1)
df_metabolites.dropna(subset=["InChI"], inplace=True)
df_metabolites

[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unknown element(s): *





[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *



[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unsupported in this mode element '*'

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *



[12:06:19] ERROR: Unknown element(s): *



[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *



[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unknown element(s): *


[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unknown element(s): *



[12:06:19] ERROR: Unknown element(s): *

[12:06:19] ERROR: Unkn

Unnamed: 0,metabolite ID,InChI
1,InChI=1S/C31H43N6O16P/c1-13-22-14(2)36(12-37(2...,InChI=1S/C31H43N6O16P/c1-13-22-14(2)36(12-37(2...
2,"InChI=1S/CH4NO5P/c2-1(3)7-8(4,5)6/h(H2,2,3)(H2...","InChI=1S/CH4NO5P/c2-1(3)7-8(4,5)6/h(H2,2,3)(H2..."
3,"InChI=1S/C5H6N2O3/c8-2-3-1-6-5(10)7-4(3)9/h1,8...","InChI=1S/C5H6N2O3/c8-2-3-1-6-5(10)7-4(3)9/h1,8..."
4,"InChI=1S/C5H13O8P/c6-1-3(7)5(9)4(8)2-13-14(10,...","InChI=1S/C5H13O8P/c6-1-3(7)5(9)4(8)2-13-14(10,..."
5,InChI=1S/C18H32O16/c19-1-5(21)9(23)10(24)6(22)...,InChI=1S/C18H32O16/c19-1-5(21)9(23)10(24)6(22)...
...,...,...
4918,InChI=1S/C39H66N7O17P3S/c1-4-5-6-7-8-9-10-11-1...,InChI=1S/C39H66N7O17P3S/c1-4-5-6-7-8-9-10-11-1...
4919,InChI=1S/C9H13NO4/c1-5(2)8(9(13)14)10-6(11)3-4...,InChI=1S/C9H13NO4/c1-5(2)8(9(13)14)10-6(11)3-4...
4920,InChI=1S/C28H33N7O7/c1-17-12-25(38)42-22-13-19...,InChI=1S/C28H33N7O7/c1-17-12-25(38)42-22-13-19...
4921,InChI=1S/C15H10O7.ClH/c16-7-3-9(17)8-5-12(20)1...,InChI=1S/C15H10O7.ClH/c16-7-3-9(17)8-5-12(20)1...


In [13]:
def process_row(row):
    try:
        sub_inchis = [list(df_metabolites["InChI"].loc[df_metabolites["metabolite ID"] == sub])[0] for sub in row["substrate_IDs"]]
        pro_inchis = [list(df_metabolites["InChI"].loc[df_metabolites["metabolite ID"] == pro])[0] for pro in row["product_IDs"]]
        return pd.Series([frozenset(sub_inchis), frozenset(pro_inchis)])
    except: return pd.Series([None, None])

df_kcat[["substrate_InChI_set", "product_InChI_set"]] = df_kcat.apply(process_row, axis=1)
df_kcat[["substrate_InChI_set", "product_InChI_set"]]

Unnamed: 0,substrate_InChI_set,product_InChI_set
0,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...
1,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...
2,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...
3,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...
4,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...
...,...,...
11754,"(InChI=1S/H2O/h1H2, InChI=1S/C3H4O2/c1-3(5)2-4...","(InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6..."
11755,"(InChI=1S/C2H2O4/c3-1(4)2(5)6/h(H,3,4)(H,5,6)/...","(InChI=1S/C23H36N7O19P3S/c1-23(2,16(33)19(34)2..."
11756,"(InChI=1S/C2H2O4/c3-1(4)2(5)6/h(H,3,4)(H,5,6)/...","(InChI=1S/C23H36N7O19P3S/c1-23(2,16(33)19(34)2..."
11757,"(InChI=1S/C23H37FN7O17P3S/c1-23(2,18(35)21(36)...","(InChI=1S/C21H36N7O16P3S/c1-21(2,16(31)19(32)2..."


In [14]:
df_sequences = df_kcat[["Sequence"]].copy()\
                                    .drop_duplicates()\
                                    .dropna(subset=["Sequence"])\
                                    .reset_index(drop=True)
df_sequences["Sequence ID"] = [f"Sequence_{ind}" for ind, _ in enumerate(df_sequences["Sequence"])]
df_sequences

Unnamed: 0,Sequence,Sequence ID
0,MSQNLFNVEDYRKLRQKRLPKMVYDYLEGGAEDEYGVKHNRDVFQQ...,Sequence_0
1,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHT...,Sequence_1
2,MAMQKIFAREILDSRGNPTVEVDLHTAKGRFRAAVPSGASTGIYEA...,Sequence_2
3,MKFFLLLFTIGFCWAQYSPNTQQGRTSIVHLFEWRWVDIALECERY...,Sequence_3
4,MNPLDLIAKRAYPYETEKRDKTYLALNENPFPFPEDLVDEVFRRLN...,Sequence_4
...,...,...
3942,MAGPLSGLRVVELAGIGPGPHAAMILGDLGADVVRIDRPSSVDGIS...,Sequence_3942
3943,MSSICKGDNSDLTEERKNATFDTDKMAAVIYGREEIASRRRQLTES...,Sequence_3943
3944,MGILSYLCYSLFYLSIFFIIRLLFQSRKFKNLPPGPTSLPIIGNLH...,Sequence_3944
3945,MAQKSALIILAAEGAEEMEVIITGDVLARGEIRVVYAGLDGAEPVK...,Sequence_3945


In [15]:
df_reactions = df_kcat[["substrate_InChI_set", "product_InChI_set"]]\
                        .dropna()\
                        .drop_duplicates()\
                        .reset_index(drop=True)
df_reactions["Reaction ID"] = [f"Reaction_{ind}" for ind in df_reactions.index]
df_reactions

Unnamed: 0,substrate_InChI_set,product_InChI_set,Reaction ID
0,(InChI=1S/C17H21N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,(InChI=1S/C8H6O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1...,Reaction_0
1,(InChI=1S/C17H23N4O9P/c1-7-3-9-10(4-8(7)2)21(1...,"(InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C17H21N4O9...",Reaction_1
2,(InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-1...,(InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-1...,Reaction_2
3,"(InChI=1S/H2O/h1H2, InChI=1S/C16H28N2O11/c1-5(...",(InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-1...,Reaction_3
4,"(InChI=1S/C3H7O7P/c4-1-2(3(5)6)10-11(7,8)9/h2,...","(InChI=1S/H2O/h1H2, InChI=1S/C3H5O6P/c1-2(3(4)...",Reaction_4
...,...,...,...
4437,(InChI=1S/C34H58N7O21P3S/c1-18(58-33-21(43)13-...,"(InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C34H56N7O2...",Reaction_4437
4438,(InChI=1S/C11H19NO8/c1-4(10(16)17)19-9-7(12-5(...,(InChI=1S/C11H20NO11P/c1-4(10(16)17)21-9-7(12-...,Reaction_4438
4439,(InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-1...,(InChI=1S/C8H16NO9P/c1-3(11)9-5-7(13)6(12)4(2-...,Reaction_4439
4440,(InChI=1S/C16H12O4/c1-19-12-5-2-10(3-6-12)14-9...,(InChI=1S/C16H12O5/c1-20-10-3-5-11(14(18)7-10)...,Reaction_4440


In [16]:
def calculate_mw_ratio(row):
    substrates = list(row["substrate_InChI_set"])
    products = list(row["product_InChI_set"])
    
    mw_subs = mw_mets(metabolites = substrates)
    mw_pros = mw_mets(metabolites = products)
    
    return mw_subs / mw_pros if mw_pros != 0 else np.inf

df_reactions["MW_frac"] = df_reactions.apply(calculate_mw_ratio, axis=1)












































In [17]:
df_kcat = df_kcat.merge(df_sequences, on = "Sequence", how = "left")
df_kcat = df_kcat.merge(df_reactions, on=['substrate_InChI_set', 'product_InChI_set'], how='left')

In [18]:
df_kcat["kcat"] = df_kcat["kcat"].astype(float)
df_kcat_new = df_kcat.groupby(["Reaction ID", "Sequence ID"]).agg({
    "kcat": tuple,
    "Uniprot ID": tuple,
    "from BRENDA": tuple,
    "from Uniprot": tuple,
    "from Sabio": tuple,
    "checked": tuple
}).reset_index()

df_kcat_new = df_kcat_new.merge(df_reactions[['Reaction ID', 'substrate_InChI_set', 'product_InChI_set', 'MW_frac']], on='Reaction ID', how='left')
df_kcat_new.rename(columns={'substrate_InChI_set': 'substrates', 'product_InChI_set': 'products'}, inplace=True)
df_kcat_new = df_kcat_new.merge(df_sequences[['Sequence ID', 'Sequence']], on='Sequence ID', how='left')

In [19]:
df_all_kcat = pd.read_pickle("../../data/kcat_data/kcat_data_merged.pkl")

df_all_kcat['kcat'] = df_all_kcat['kcat'].astype(float)
df_all_kcat['max_kcat_for_UID'] = df_all_kcat.groupby('Uniprot ID')['kcat'].transform(max)
df_all_kcat= df_all_kcat[["Uniprot ID", "max_kcat_for_UID"]].drop_duplicates()
df_all_kcat["test"] = df_all_kcat["Uniprot ID"].apply(lambda x: tuple(x.split(" ")))
df_all_kcat.drop(columns = ["Uniprot ID"], inplace = True)

In [20]:
df_kcat_new["test"] = df_kcat_new["Uniprot ID"].apply(lambda x: tuple(set(x)))
df_kcat_new = df_kcat_new.merge(df_all_kcat, on='test', how='left')
df_kcat_new.drop(columns = ["test"], inplace = True)
df_kcat_new["max_kcat_for_RID"] = df_kcat_new["kcat"].apply(lambda x: max(x))

In [71]:
#df_EC_kcat = df_kcat_new[['ECs',"kcat"]].copy().explode('ECs')
#df_EC_kcat["kcat"] = df_EC_kcat["kcat"].apply(lambda x: max(x))
#df_EC_kcat['kcat'] = df_EC_kcat.groupby('ECs')['kcat'].transform('max')
#df_EC_kcat = df_EC_kcat.drop_duplicates().sort_values(by='ECs').dropna().reset_index(drop=True)

Unnamed: 0,ECs,kcat
0,1.-.-.-,100.000
1,1.1.1.-,343000.000
2,1.1.1.1,1333.000
3,1.1.1.10,76.000
4,1.1.1.100,18.000
...,...,...
1821,6.3.5.5,4.790
1822,6.4.1.1,60.000
1823,6.4.1.2,36.800
1824,6.5.1.2,0.072


In [73]:
df_EC_kcat = pd.read_pickle("../../data/enzyme_data/df_EC_max_kcat.pkl")

In [74]:
df_sequences.to_pickle("../../data/enzyme_data/all_sequences_with_IDs.pkl")
df_reactions.to_pickle("../../data/reaction_data/all_reactions_with_IDs.pkl")

In [77]:
train_df = pd.read_pickle(join("..", "..", "data", "kcat_data", "splits", "train_df_kcat.pkl"))

In [81]:
train_df

Unnamed: 0,Reaction ID,Sequence ID,kcat_values,Uniprot IDs,from_BRENDA,from_Sabio,from_Uniprot,checked,Sequence,substrates,...,max_kcat_for_EC,structural_fp,difference_fp,ESM1b,ESM1b_ts,geomean_kcat,frac_of_max_UID,frac_of_max_RID,frac_of_max_EC,DRFP
0,Reaction_2353,Sequence_1357,"[11.4, 18, 19, 30.2]","[A4HSF7, A4HSF7, A4HSF7, A4HSF7]","[1, 1, 1, 1]","[0, 0, 0, 0]","[0, 0, 0, 0]","[False, False, False, False]",MSRAYDLVVLGAGSGGLEAGWNAAVTHKKKVAVVDVQATHGPPLFA...,{InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-1...,...,818.000,1100111100000001001000110110010001001111111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.018952318, 0.123712085, 0.090606414, 0.055...","[0.60406023, -0.6636518, -0.3568442, 0.9452454...",1.267734,1.000000,0.645299,0.036919,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Reaction_2090,Sequence_1325,[0.021],[Q8G907],[1],[0],[0],[False],MDKLFSMIEVEVNSQCNRTCWYCPNSVSKRKETGEMDPALYKTLME...,{InChI=1S/C15H22N6O5S/c1-27(3-2-7(16)15(24)25)...,...,0.033,1100110100000000000000100010010001100001001100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.045174923, 0.14551435, -0.08372648, 0.0664...","[1.1505941, -0.49905136, 0.013322916, 0.680336...",-1.677781,0.552632,1.000000,0.636364,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Reaction_4070,Sequence_3333,"[127.0, 100.0, 15.0, 1.9]","[P05020, P05020, P05020, P05020]","[0, 0, 0, 0]","[0, 0, 0, 0]","[1, 1, 1, 1]","[False, False, False, False]",MTAPSQVLKIRRPDDWHLHLRDGDMLKTVVPYTSEIYGRAIVMPNL...,{InChI=1S/C5H6N2O4/c8-3-1-2(4(9)10)6-5(11)7-3/...,...,195.000,1100000000000000000000000000000001001011001000...,"[0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0,...","[0.120722964, 0.14700642, 0.103796266, 0.09974...","[0.6776583, 0.20057113, 0.38323998, 1.2769377,...",1.389662,0.701270,1.000000,0.651282,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Reaction_2917,Sequence_496,[3.7],[P0A0C1],[1],[0],[0],[False],MNIVENEICIRTLIDDDFPLMLKWLTDERVLEFYGGRDKKYTLESL...,{InChI=1S/C10H16N5O14P3/c11-10-13-7-4(8(18)14-...,...,18.000,1100110100000001001000110110010001001001111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.13503967, 0.111267805, -0.003093031, 0.075...","[0.03524893, 0.29595685, -0.13567334, 0.440565...",0.568202,1.000000,0.569231,0.205556,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, ..."
4,Reaction_1344,Sequence_1378,[12.4],[Q9CYK2],[1],[0],[0],[True],MAGSEDKRVVGTLHLLLLQATVLSLTAGNLSLVSAAWTQEKNHHQP...,{InChI=1S/C7H13N3O4/c8-4(1-2-5(9)11)7(14)10-3-...,...,220.000,1100000000000000000000000000000001001001001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.15608172, 0.38407168, 0.25993258, -0.108591...","[0.852272, -0.45679456, -0.13155718, 0.1722895...",1.093422,0.295238,1.000000,0.056364,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3369,Reaction_4380,Sequence_2106,[9.0],[P37821],[0],[0],[1],[False],MRMLSRNATFNSHGQDSSYFLGWQEYEKNPYHEVHNTNGIIQMGLA...,{InChI=1S/C15H22N6O5S/c1-27(3-2-7(16)15(24)25)...,...,18.000,1100110100000000000000100010010001100001001100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.06506164, 0.33486807, -0.044696603, 0.06077...","[1.2487527, -0.9111954, -0.10162743, 0.0384985...",0.954243,1.000000,1.000000,0.500000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3370,Reaction_133,Sequence_2563,[8.7],[Q91XV4],[0],[1],[0],[False],MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDS...,{InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-1...,...,119.000,1100111100000001001000110110010001001101111100...,"[0.0, 0.0, 0.0, 0.0, 40.0, 0.0, 0.0, 0.0, 0.0,...","[0.10418188, -0.02103975, -0.12556212, 0.23702...","[0.5342785, -0.748276, -0.30547145, 0.41810054...",0.939519,0.150000,0.790909,0.073109,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3371,Reaction_1659,Sequence_1030,[35.0],[P44886],[1],[0],[0],[True],MSANFTDKNGRQSKGVLLLRTLAMPSDTNANGDIFGGWIMSQMDMG...,{InChI=1S/C25H40N7O18P3S/c1-13(33)8-16(35)54-7...,...,,1100110100000001001000110110010001101101111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0...","[-0.06449494, 0.22217515, 0.05601762, -0.00443...","[0.97031575, -0.4627978, -0.12502046, 0.412632...",1.544068,0.184211,1.000000,1.000000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3372,Reaction_265,Sequence_2820,[43.4],[F6IXY6],[1],[0],[0],[True],MKVIVIGCTHAGTAAVNQILASNPETDVTIYERNDNVSFLSCGIAL...,{InChI=1S/C21H29N7O14P2/c22-17-12-19(25-7-24-1...,...,,1100111100000001001000110110010001001111111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.074337855, 0.23836288, -0.22196023, 0.11088...","[0.8693828, 0.4708751, 0.67840207, 0.74606836,...",1.637490,1.000000,1.000000,1.000000,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
