# Data Pipeline - Kinetic rate pipeline

## Required Python Libraries

In [None]:
import glob
import hashlib
import numpy as np
import os
import re
import pandas as pd
import pickle
import pybel
from SOAPpy import SOAPProxy
import string
import time
import urllib2

## Necessary User Input

BRENDA API username and password

In [None]:
username = 'joshlewis@gatech.edu' # enter your username
password = '' # enter your password

## Code

### Download kcat values from BRENDA database

Load list of EC numbers to download data for

In [None]:
ec = [line.split('\n')[0] for line in open('_data_/input/ec_numbers.txt','r')]

#### Human organism

Download and extract information from BRENDA database - restrict to human organism

In [None]:
# function used for parsing data
def find_between(s, first_string, second_string, ind):
    # find string between two other strings
    start = s.index(first_string, ind) + len(first_string)
    end = s.index(second_string ,start)
    return s[start:end]

In [None]:
# save list of BRENDA ID's and InChI keys
brendaids = []
inchis = []

# iterate over list of EC numbers
for i in range(len(ec)):
    
    # log into BRENDA database
    parameters = username+","+hashlib.sha256(password).hexdigest()+",ecNumber*"+ec[i]+"#organism*Homo sapiens#"
    client = SOAPProxy("http://www.brenda-enzymes.info/soap/brenda_server.php")
    
    # get turnover number data
    success = False
    while success == False:
        try:
            resultString = client.getTurnoverNumber(parameters)
            success = True
        except:
            time.sleep(60)
    
    # if results available
    if resultString != '':
        
        # split up result entries
        results = resultString.split('!')
        
        # initialize output file
        with open('_data_/processing/brenda/human/%s.tsv' % ec[i],'w') as f:
            f.write('VALUE\tSUBSTRATE\tBRENDA\tINCHI\tTEMP\tPH\tMUTATION\tNOTES\n')
            
            # iterate over result entries
            for s in results:
                
                # if turnover number exists
                if (s.split('turnoverNumber*')[1].split('#turnoverNumberMaximum')[0] != '-999') and (s.split('ligandStructureId*')[1].split('#literature')[0] != ''):
                
                    # extract turnover number value, substrate, brenda id, notes
                    value = s.split('turnoverNumber*')[1].split('#turnoverNumberMaximum')[0]
                    substrate = s.split('substrate*')[1].split('#commentary')[0]
                    brendaid = s.split('ligandStructureId*')[1].split('#literature')[0]
                    notes = filter(lambda x: x in set(string.printable), s).split('commentary*')[1].split('#organism')[0].replace('&Acirc;','').replace('&deg;','')
                    
                    # need to get InChI ID for every new BRENDA substrate
                    if brendaid not in brendaids:
                        brendaids.append(brendaid)
                        
                        # get InChI ID
                        url = 'http://www.brenda-enzymes.org/ligand.php?brenda_group_id=%s' % brendaid
                        
                        # sometime get an error when executing this line, repeat until no error
                        success = False
                        while success == False:
                            try:
                                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                                success = True
                            except:
                                time.sleep(60)
                        
                        inchi = find_between(text,'- InchiKey: ',',',0)
                        inchis.append(inchi)
                        
                    else:
                        inchi = inchis[brendaids.index(brendaid)]
                    
                    # temperature
                    temp_pattern = re.compile('([0-9]{1,}\.{0,1}[0-9]{0,})C')
                    search = temp_pattern.search(notes)
                    if search != None:
                        temp = search.group(1)
                    else:
                        temp = ''
                    
                    # pH
                    ph_pattern = re.compile('pH ([0-9]{1,}\.{0,1}[0-9]{0,})')
                    search = ph_pattern.search(notes)
                    if search != None:
                        ph = search.group(1)
                    else:
                        ph = ''
                    
                    # whether enzyme is mutant or wild-type
                    mutation = ''
                    for test_string in ['mutant','mutation','mutated']:
                                if test_string in notes.lower():
                                    mutation = 'X'
                        
                    # write to file
                    f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (value,substrate,brendaid,inchi,temp,ph,mutation,notes))

#### All organisms

Download and extract information from BRENDA database - get data for all organisms

In [None]:
# iterate over list of EC numbers
for i in range(len(ec)):
    
    # log into BRENDA database
    parameters = username+","+hashlib.sha256(password).hexdigest()+",ecNumber*"+ec[i]+"#"
    client = SOAPProxy("http://www.brenda-enzymes.info/soap/brenda_server.php")
    
    # get turnover number data
    success = False
    while success == False:
        try:
            resultString = client.getTurnoverNumber(parameters)
            success = True
        except:
            time.sleep(60)
    
    # if results available
    if resultString != '':
        
        # split up result entries
        results = resultString.split('!')
        
        # initialize output file
        with open('_data_/processing/brenda/all_organisms/%s.tsv' % ec[i],'w') as f:
            f.write('VALUE\tSUBSTRATE\tBRENDA\tINCHI\tTEMP\tPH\tMUTATION\tNOTES\n')
            
            # iterate over result entries
            for s in results:
                
                # if turnover number exists
                if (s.split('turnoverNumber*')[1].split('#turnoverNumberMaximum')[0] != '-999') and (s.split('ligandStructureId*')[1].split('#literature')[0] != ''):
                
                    # extract turnover number value, substrate, brenda id, notes
                    value = s.split('turnoverNumber*')[1].split('#turnoverNumberMaximum')[0]
                    substrate = s.split('substrate*')[1].split('#commentary')[0]
                    brendaid = s.split('ligandStructureId*')[1].split('#literature')[0]
                    notes = filter(lambda x: x in set(string.printable), s).split('commentary*')[1].split('#organism')[0].replace('&Acirc;','').replace('&deg;','')
                    
                    # need to get InChI ID for every new BRENDA substrate
                    if brendaid not in brendaids:
                        brendaids.append(brendaid)
                        
                        # get InChI ID
                        url = 'http://www.brenda-enzymes.org/ligand.php?brenda_group_id=%s' % brendaid
                        
                        success = False
                        while success == False:
                            try:
                                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                                success = True
                            except:
                                time.sleep(60)
                        
                        inchi = find_between(text,'- InchiKey: ',',',0)
                        inchis.append(inchi)
                        
                    else:
                        inchi = inchis[brendaids.index(brendaid)]
                    
                    # temperature
                    temp_pattern = re.compile('([0-9]{1,}\.{0,1}[0-9]{0,})C')
                    search = temp_pattern.search(notes)
                    if search != None:
                        temp = search.group(1)
                    else:
                        temp = ''
                    
                    # pH
                    ph_pattern = re.compile('pH ([0-9]{1,}\.{0,1}[0-9]{0,})')
                    search = ph_pattern.search(notes)
                    if search != None:
                        ph = search.group(1)
                    else:
                        ph = ''
                    
                    # whether enzyme is mutant or wild-type
                    mutation = ''
                    for test_string in ['mutant','mutation','mutated']:
                                if test_string in notes.lower():
                                    mutation = 'X'
                        
                    # write to file
                    f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (value,substrate,brendaid,inchi,temp,ph,mutation,notes))

### Convert metabolite ID's to compare to Recon3D metabolites

Get list of all InChI's

In [None]:
inchis = []
for fn in glob.glob('_data_/processing/brenda/human/*.tsv'):
    df = pd.read_table(fn, sep='\t')
    for inchi in list(set(df['INCHI'].values.tolist())):
        if inchi not in inchis:
            inchis.append(inchi)
for fn in glob.glob('_data_/processing/brenda/all_organisms/*.tsv'):
    df = pd.read_table(fn, sep='\t')
    for inchi in list(set(df['INCHI'].values.tolist())):
        if inchi not in inchis:
            inchis.append(inchi)
for fn in glob.glob('_data_/processing/matsson/*.tsv'):
    df = pd.read_table(fn, sep='\t')
    for inchi in list(set(df['INCHI'].values.tolist())):
        if inchi not in inchis:
            inchis.append(inchi)

Convert InChI keys to ChEBI, HMDB, InChI code, KEGG, and PubChem

In [None]:
with open('_data_/processing/status.txt','w') as f:

    # initialize lists of metabolite ID's
    chebis = []
    hmdbs = []
    inchi_codes = []
    keggs = []
    pubchems = []

    # iterate over InChI keys
    for inchi in inchis:

        # get ChEBI ID - only take 1st ID
        url = 'http://cts.fiehnlab.ucdavis.edu/service/convert/InChIKey/ChEBI/%s' % inchi

        success = False
        while success == False:
            try:
                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                success = True
            except:
                time.sleep(60)

        if '"result": \n    [\n      ' in text:
            chebi = [x[1:-1] for x in find_between(text,'"result": \n    [\n      ','\n    ]',0).split(',\n      ')]

            # only take first listed chebi ID
            chebis.append(chebi[0])
        else:
            chebis.append(np.nan)

        # get HMDB ID
        url = 'http://cts.fiehnlab.ucdavis.edu/service/convert/InChIKey/Human%%20Metabolome%%20Database/%s' % inchi

        success = False
        while success == False:
            try:
                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                success = True
            except:
                time.sleep(60)

        if '"result": \n    [\n      ' in text:
            hmdb = [x[1:-1] for x in find_between(text,'"result": \n    [\n      ','\n    ]',0).split(',\n      ')]

            # only take first listed HMDB ID
            hmdbs.append(hmdb[0])
        else:
            hmdbs.append(np.nan)

        # get InChI code
        url = 'http://cts.fiehnlab.ucdavis.edu/service/convert/InChIKey/InChI%%20Code/%s' % inchi

        success = False
        while success == False:
            try:
                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                success = True
            except:
                time.sleep(60)

        if '"result": \n    [\n      ' in text:
            inchi_code = [x[1:-1] for x in find_between(text,'"result": \n    [\n      ','\n    ]',0).split(',\n      ')]

            # only take first listed InChI code
            inchi_codes.append(inchi_code[0])
        else:
            inchi_codes.append(np.nan)

        # get KEGG ID
        url = 'http://cts.fiehnlab.ucdavis.edu/service/convert/InChIKey/KEGG/%s' % inchi

        success = False
        while success == False:
            try:
                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                success = True
            except:
                time.sleep(60)

        if '"result": \n    [\n      ' in text:
            kegg = [x[1:-1] for x in find_between(text,'"result": \n    [\n      ','\n    ]',0).split(',\n      ')]

            # only take first listed KEGG ID
            keggs.append(kegg[0])
        else:
            keggs.append(np.nan)

        # get PubChem ID
        url = 'http://cts.fiehnlab.ucdavis.edu/service/convert/InChIKey/PubChem%%20CID/%s' % inchi

        success = False
        while success == False:
            try:
                text = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})).read()
                success = True
            except:
                time.sleep(60)

        if '"result": \n    [\n      ' in text:
            pubchem = [x[1:-1] for x in find_between(text,'"result": \n    [\n      ','\n    ]',0).split(',\n      ')]

            # only take first listed KEGG ID
            pubchems.append(pubchem[0])
        else:
            pubchems.append(np.nan)
            
        f.write('%s\n' % inchi)

Save conversion

In [None]:
with open('_data_/processing/metids.pkl', 'wb') as output:
    pickle.dump(inchis, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(chebis, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(hmdbs, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(inchi_codes, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(keggs, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(pubchems, output, pickle.HIGHEST_PROTOCOL)

Load conversion

In [None]:
with open('_data_/processing/metids.pkl', 'rb') as input:
    inchis = pickle.load(input)
    chebis = pickle.load(input)
    hmdbs = pickle.load(input)
    inchi_codes = pickle.load(input)
    keggs = pickle.load(input)
    pubchems = pickle.load(input)

Manual edits - Thioredoxin

In [None]:
chebis[inchis.index('ACFIFAYJTCAMRW-XKNYDFJKSA-N')] = 'CHEBI:18191'
keggs[inchis.index('ACFIFAYJTCAMRW-XKNYDFJKSA-N')] = 'C00343'

### Implement kinetic rate pipeline

<u>Pipeline:</u><br>
For all steps: no values from mutated enzymes will be kept<br>
1: correct ec, correct substrate, human, T = 37, pH = correct<br>
2: correct ec, correct substrate, human, T = 25-40, pH within 1.0 of correct<br>
3: correct ec, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
4: correct ec, correct substrate, all organisms, T = 37, pH = correct<br>
5: correct ec, correct substrate, all organisms, T = 25-40, pH within 1.0 of correct<br>
6: correct ec, correct substrate, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
7: correct ec, all substrates, human, T = 37, pH = correct<br>
8: correct ec, all substrates, human, T = 25-40, pH within 1.0 of correct<br>
9: correct ec, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
10: correct ec, all substrates, all organisms, T = 37, pH = correct<br>
11: correct ec, all substrates, all organisms, T = 25-40, pH within 1.0 of correct<br>
12: correct ec, all substrates, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
13: X.X.X.ANY, correct substrate, human, T = 37, pH = correct<br>
14: X.X.X.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct<br>
15: X.X.X.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
16: X.X.X.ANY, correct substrate, all organisms, T = 37, pH = correct<br>
17: X.X.X.ANY, correct substrate, all organisms, T = 25-40, pH within 1.0 of correct<br>
18: X.X.X.ANY, correct substrate, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
19: X.X.X.ANY, all substrates, human, T = 37, pH = correct<br>
20: X.X.X.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct<br>
21: X.X.X.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
22: X.X.X.ANY, all substrates, all organisms, T = 37, pH = correct<br>
23: X.X.X.ANY all substrates, all organisms, T = 25-40, pH within 1.0 of correct<br>
24: X.X.X.ANY, all substrates, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
25: X.X.ANY.ANY, correct substrate, human, T = 37, pH = correct<br>
26: X.X.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct<br>
27: X.X.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
28: X.X.ANY.ANY, correct substrate, all organisms, T = 37, pH = correct<br>
29: X.X.ANY.ANY, correct substrate, all organisms, T = 25-40, pH within 1.0 of correct<br>
30: X.X.ANY.ANY, correct substrate, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
31: X.X.ANY.ANY, all substrates, human, T = 37, pH = correct<br>
32: X.X.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct<br>
33: X.X.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
34: X.X.ANY.ANY, all substrates, all organisms, T = 37, pH = correct<br>
35: X.X.ANY.ANY all substrates, all organisms, T = 25-40, pH within 1.0 of correct<br>
36: X.X.ANY.ANY, all substrates, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
37: X.ANY.ANY.ANY, correct substrate, human, T = 37, pH = correct<br>
38: X.ANY.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct<br>
39: X.ANY.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
40: X.ANY.ANY.ANY, correct substrate, all organisms, T = 37, pH = correct<br>
41: X.ANY.ANY.ANY, correct substrate, all organisms, T = 25-40, pH within 1.0 of correct<br>
42: X.ANY.ANY.ANY, correct substrate, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
43: X.ANY.ANY.ANY, all substrates, human, T = 37, pH = correct<br>
44: X.ANY.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct<br>
45: X.ANY.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>
46: X.ANY.ANY.ANY, all substrates, all organisms, T = 37, pH = correct<br>
47: X.ANY.ANY.ANY all substrates, all organisms, T = 25-40, pH within 1.0 of correct<br>
48: X.ANY.ANY.ANY, all substrates, all organisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown<br>

Load reaction and metabolite information files

In [None]:
df_reactions = pd.read_table('../recon/reactions.tsv',sep='\t')
df_metabolites = pd.read_table('../recon/metabolites.tsv',sep='\t')

Recon3D compartment pH's

In [None]:
ph = {'c':7.2, 'e':7.4, 'g':6.35, 'l':5.5, 'm':8.0, 'n':7.2, 'r':7.2, 'x':7.0}

Get list of EC numbers with available turnover number data

In [None]:
available_human = [x[:-4] for x in os.listdir('_data_/processing/brenda/human')]
available_all = [x[:-4] for x in os.listdir('_data_/processing/brenda/all_organisms')]

Initialize results dataframes

In [None]:
kcat_forward = pd.DataFrame(columns=['REACTION','KCAT [1/hr]','STEP'])
kcat_reverse = pd.DataFrame(columns=['REACTION','KCAT [1/hr]','STEP'])

Run pipeline

In [None]:
with open('_data_/processing/status.txt','w') as f:

    # iterate over reactions
    for i in range(df_reactions.shape[0]):
        print(i)
        
        # extract metabolites
        if '<=>' in df_reactions.loc[i]['FORMULA']:
            metabolites_forward =  [x for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' <=> ')[0].split(' ') if x != '+']) if a%2 != 0]
            if len(metabolites_forward) > 0:
                stoich_forward =  [float(x) for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' <=> ')[0].split(' ') if x != '+']) if a%2 == 0]
            metabolites_reverse = [x for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' <=> ')[1].split(' ') if x != '+']) if a%2 != 0]
            if len(metabolites_reverse) > 0:
                stoich_reverse = [float(x) for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' <=> ')[1].split(' ') if x != '+']) if a%2 == 0]
        elif '-->' in df_reactions.loc[i]['FORMULA']:
            metabolites_forward =  [x for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' --> ')[0].split(' ') if x != '+']) if a%2 != 0]
            if len(metabolites_forward) > 0:
                stoich_forward =  [float(x) for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' --> ')[0].split(' ') if x != '+']) if a%2 == 0]
            metabolites_reverse = []
            stoich_reverse = []
        elif 'X--X' in df_reactions.loc[i]['FORMULA']:
            metabolites_forward =  [x for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' X--X ')[0].split(' ') if x != '+']) if a%2 != 0]
            if len(metabolites_forward) > 0:
                stoich_forward =  [float(x) for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' X--X ')[0].split(' ') if x != '+']) if a%2 == 0]
            if len(df_reactions.loc[i]['FORMULA'].split(' X--X ')[1]) > 0:
                metabolites_reverse = [x for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' X--X ')[1].split(' ') if x != '+']) if a%2 != 0]
                if len(metabolites_reverse) > 0:
                    stoich_reverse = [float(x) for a,x in enumerate([x for x in df_reactions.loc[i]['FORMULA'].split(' X--X ')[1].split(' ') if x != '+']) if a%2 == 0]
            else:
                metabolites_reverse = []
                stoich_reverse = []

        # if has EC number available
        if type(df_reactions.loc[i]['EC NUMBER']) == str:
            
            # Matsson data - import or export
            if df_reactions.loc[i]['EC NUMBER'].split(' | ')[0][:2] in ['I-','E-']:

                # get metabolite ID's and pH's
                met_chebi = []
                met_kegg = []
                met_pubchem = []
                met_inchi = []
                met_hmdb = []
                met_smiles = []
                for a in range(len(metabolites_forward)):

                    # chebi
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['CHEBI']) == str:
                        met_chebi.append(str(int(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['CHEBI'])))
                    else:
                        met_chebi.append(np.nan)

                    # kegg
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['KEGG']) == str:
                        met_kegg.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['KEGG'])
                    else:
                        met_kegg.append(np.nan)

                    # pubchem
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['PUBCHEM']) == str:
                        met_pubchem.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['PUBCHEM'])
                    else:
                        met_pubchem.append(np.nan)

                    # inchi
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['INCHI STRING']) == str:
                        met_inchi.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['INCHI STRING'])
                    else:
                        met_inchi.append(np.nan)

                    # hmdb
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['HMDB']) == str:
                        met_hmdb.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['HMDB'])
                    else:
                        met_hmdb.append(np.nan)

                    # smiles
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['SMILE']) == str:
                        met_smiles.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['SMILE'])
                    else:
                        met_smiles.append(np.nan)

                # load data
                if df_reactions.loc[i]['EC NUMBER'].split(' | ')[0][:2] == 'I-':
                    df_human = pd.read_table('_data_/processing/matsson/import.tsv')
                else:
                    df_human = pd.read_table('_data_/processing/matsson/export.tsv')
                
                # get ec numbers
                ecs = [x.split('-')[1] for x in df_reactions.loc[i]['EC NUMBER'].split(' | ')]
                
                # if any ec's match, use just those
                ecs_match = [x for x in ecs if x in df_human['TRANSPORTER'].values.tolist()]
                if len(ecs_match) > 0:
                    
                    # initialize pipeline values
                    values_ec_1 = []
                    values_ec_7 = []

                    # initialize tanimoto indices
                    tanimoto_ec_7 = []
                    
                    # iterate over matching ec numbers
                    for c in range(len(ecs_match)):
                        values_ec_1.append([])
                        values_ec_7.append([])
                        tanimoto_ec_7.append([])

                        # iterate over data
                        for b in range(df_human.shape[0]):
                            if df_human.at[b,'TRANSPORTER'] == ecs_match[c]:

                                # get substrate ID's
                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                else:
                                    sub_chebi = np.nan

                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_hmdb = np.nan

                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_inchi = np.nan

                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_kegg = np.nan

                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_pubchem = np.nan

                                # 1: correct substrate
                                for a in range(len(metabolites_forward)):
                                    if ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_1[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                        break

                                # 7: all substrates
                                tanimoto = []
                                for a in range(len(metabolites_forward)):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_7[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_7[c].append(max(tanimoto))

                    # aggregate data for all valid ec numbers
                    values1 = []
                    values7 = []
                    for c in range(len(ecs_match)):
                        if len(values_ec_1[c]) > 0:
                            values1.append(np.mean(values_ec_1[c]))
                        if len(values_ec_7[c]) > 0:
                            if max(tanimoto_ec_7[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_7[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_7[c]) for x in tanimoto_ec_7[c]]
                            values7.append(np.dot(weights,values_ec_7[c]))

                    # determine which pipeline should be used
                    if len(values1) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values1)*60*60,'1Mec']
                    elif len(values7) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values7)*60*60,'7Mec']

                    # not reversible
                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]
                    
                # if no ec's match, use all
                else:
                    ecs_match = [x for x in df_human['TRANSPORTER'].values.tolist()]
                
                    # initialize pipeline values
                    values_ec_1 = []
                    values_ec_7 = []

                    # initialize tanimoto indices
                    tanimoto_ec_7 = []
                    
                    # iterate over matching ec numbers
                    for c in range(len(ecs_match)):
                        values_ec_1.append([])
                        values_ec_7.append([])
                        tanimoto_ec_7.append([])

                        # iterate over data
                        for b in range(df_human.shape[0]):
                            if df_human.at[b,'TRANSPORTER'] == ecs_match[c]:

                                # get substrate ID's
                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                else:
                                    sub_chebi = np.nan

                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_hmdb = np.nan

                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_inchi = np.nan

                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_kegg = np.nan

                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_pubchem = np.nan

                                # 1: correct substrate
                                for a in range(len(metabolites_forward)):
                                    if ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_1[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                        break

                                # 7: all substrates
                                tanimoto = []
                                for a in range(len(metabolites_forward)):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_7[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_7[c].append(max(tanimoto))

                    # aggregate data for all valid ec numbers
                    values1 = []
                    values7 = []
                    for c in range(len(ecs_match)):
                        if len(values_ec_1[c]) > 0:
                            values1.append(np.mean(values_ec_1[c]))
                        if len(values_ec_7[c]) > 0:
                            if max(tanimoto_ec_7[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_7[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_7[c]) for x in tanimoto_ec_7[c]]
                            values7.append(np.dot(weights,values_ec_7[c]))

                    # determine which pipeline should be used
                    if len(values1) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values1)*60*60,'1Mec']
                    elif len(values7) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values7)*60*60,'7Mec']

                    # not reversible
                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]
            
            # else, do pipeline
            else:

                # # # FORWARD DIRECTION # # #

                # get metabolite ID's and pH's
                met_chebi = []
                met_kegg = []
                met_pubchem = []
                met_inchi = []
                met_hmdb = []
                met_smiles = []
                met_ph = []
                for a in range(len(metabolites_forward)):

                    # chebi
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['CHEBI']) == str:
                        met_chebi.append(str(int(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['CHEBI'])))
                    else:
                        met_chebi.append(np.nan)

                    # kegg
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['KEGG']) == str:
                        met_kegg.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['KEGG'])
                    else:
                        met_kegg.append(np.nan)

                    # pubchem
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['PUBCHEM']) == str:
                        met_pubchem.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['PUBCHEM'])
                    else:
                        met_pubchem.append(np.nan)

                    # inchi
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['INCHI STRING']) == str:
                        met_inchi.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['INCHI STRING'])
                    else:
                        met_inchi.append(np.nan)

                    # hmdb
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['HMDB']) == str:
                        met_hmdb.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['HMDB'])
                    else:
                        met_hmdb.append(np.nan)

                    # smiles
                    if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['SMILE']) == str:
                        met_smiles.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_forward[a])]['SMILE'])
                    else:
                        met_smiles.append(np.nan)

                    # ph
                    met_ph.append(ph[metabolites_forward[a][-2]])

                # # # TRY PIPELINE 1-12 # # #

                # initialize pipeline values
                values_ec_1 = []
                values_ec_2 = []
                values_ec_3 = []
                values_ec_4 = []
                values_ec_5 = []
                values_ec_6 = []
                values_ec_7 = []
                values_ec_8 = []
                values_ec_9 = []
                values_ec_10 = []
                values_ec_11 = []
                values_ec_12 = []

                # initialize tanimoto indices
                tanimoto_ec_7 = []
                tanimoto_ec_8 = []
                tanimoto_ec_9 = []
                tanimoto_ec_10 = []
                tanimoto_ec_11 = []
                tanimoto_ec_12 = []

                # get all reaction ec numbers with all 4 numbers available
                ecs = []
                for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                    if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-') and (ec.split('.')[2] != '-') and (ec.split('.')[3] != '-'):
                        ecs.append(ec)
                        values_ec_1.append([])
                        values_ec_2.append([])
                        values_ec_3.append([])
                        values_ec_4.append([])
                        values_ec_5.append([])
                        values_ec_6.append([])
                        values_ec_7.append([])
                        values_ec_8.append([])
                        values_ec_9.append([])
                        values_ec_10.append([])
                        values_ec_11.append([])
                        values_ec_12.append([])
                        tanimoto_ec_7.append([])
                        tanimoto_ec_8.append([])
                        tanimoto_ec_9.append([])
                        tanimoto_ec_10.append([])
                        tanimoto_ec_11.append([])
                        tanimoto_ec_12.append([])

                # iterate over ec numbers
                for c,ec in enumerate(ecs):

                    # determine if human data available
                    if ec in available_human:

                        # load human data
                        df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                        # iterate over human data
                        for b in range(df_human.shape[0]):

                            # get substrate ID's
                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                            else:
                                sub_chebi = np.nan

                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_hmdb = np.nan

                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_inchi = np.nan

                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_kegg = np.nan

                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_pubchem = np.nan

                            # get temperature, ph, mutation
                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                            sub_ph = round(df_human.loc[b]['PH'],1)
                            if type(df_human.loc[b]['MUTATION']) == str:
                                mutation = True
                            else:
                                mutation = False

                            # 1: correct ec, correct substrate, human, T = 37, pH = correct
                            for a in range(len(metabolites_forward)):
                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_1[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break

                            # 2: correct ec, correct substrate, human, T = 25-40, pH within 1.0 of correct
                            for a in range(len(metabolites_forward)):
                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_2[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break 

                            # 3: correct ec, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                            for a in range(len(metabolites_forward)):
                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_3[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break

                            # 7: correct ec, all substrates, human, T = 37, pH = correct
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_7[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_7[c].append(max(tanimoto))

                           # 8: correct ec, all substrates, human, T = 25-40, pH within 1.0 of correct
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_8[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_8[c].append(max(tanimoto))

                            # 9: correct ec, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_9[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_9[c].append(max(tanimoto))    

                    # determine if all data available
                    if ec in available_all:

                        # load all data
                        df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                        # iterate over all data
                        for b in range(df_human.shape[0]):

                            # get substrate ID's
                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                            else:
                                sub_chebi = np.nan

                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_hmdb = np.nan

                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_inchi = np.nan

                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_kegg = np.nan

                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                            else:
                                sub_pubchem = np.nan

                            # get temperature, ph, mutation
                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                            sub_ph = round(df_human.loc[b]['PH'],1)
                            if type(df_human.loc[b]['MUTATION']) == str:
                                mutation = True
                            else:
                                mutation = False

                            # 4: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                            for a in range(len(metabolites_forward)):
                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_4[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break

                            # 5: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                            for a in range(len(metabolites_forward)):
                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_5[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break 

                            # 6: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                            for a in range(len(metabolites_forward)):
                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                    values_ec_6[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                    break

                            # 10: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_10[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_10[c].append(max(tanimoto))

                           # 11: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_11[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_11[c].append(max(tanimoto))

                            # 12: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                            tanimoto = []
                            for a in range(len(metabolites_forward)):
                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                    # if id's available, report tanimoto distance
                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                    else:
                                        tanimoto.append(0)

                            if len(tanimoto) > 0:
                                values_ec_12[c].append(df_human.loc[b]['VALUE'])
                                tanimoto_ec_12[c].append(max(tanimoto))                  

                # aggregate data for all valid ec numbers
                values1 = []
                values2 = []
                values3 = []
                values4 = []
                values5 = []
                values6 = []
                values7 = []
                values8 = []
                values9 = []
                values10 = []
                values11 = []
                values12 = []
                for c in range(len(ecs)):
                    if len(values_ec_1[c]) > 0:
                        values1.append(np.mean(values_ec_1[c]))
                    if len(values_ec_2[c]) > 0:
                        values2.append(np.mean(values_ec_2[c]))
                    if len(values_ec_3[c]) > 0:
                        values3.append(np.mean(values_ec_3[c]))
                    if len(values_ec_4[c]) > 0:
                        values4.append(np.mean(values_ec_4[c]))
                    if len(values_ec_5[c]) > 0:
                        values5.append(np.mean(values_ec_5[c]))
                    if len(values_ec_6[c]) > 0:
                        values6.append(np.mean(values_ec_6[c]))
                    if len(values_ec_7[c]) > 0:
                        if max(tanimoto_ec_7[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_7[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_7[c]) for x in tanimoto_ec_7[c]]
                        values7.append(np.dot(weights,values_ec_7[c]))
                    if len(values_ec_8[c]) > 0:
                        if max(tanimoto_ec_8[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_8[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_8[c]) for x in tanimoto_ec_8[c]]
                        values8.append(np.dot(weights,values_ec_8[c]))
                    if len(values_ec_9[c]) > 0:
                        if max(tanimoto_ec_9[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_9[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_9[c]) for x in tanimoto_ec_9[c]]
                        values9.append(np.dot(weights,values_ec_9[c]))
                    if len(values_ec_10[c]) > 0:
                        if max(tanimoto_ec_10[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_10[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_10[c]) for x in tanimoto_ec_10[c]]
                        values10.append(np.dot(weights,values_ec_10[c]))
                    if len(values_ec_11[c]) > 0:
                        if max(tanimoto_ec_11[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_11[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_11[c]) for x in tanimoto_ec_11[c]]
                        values11.append(np.dot(weights,values_ec_11[c]))
                    if len(values_ec_12[c]) > 0:
                        if max(tanimoto_ec_12[c]) == 0:
                            weights = np.ones(len(tanimoto_ec_12[c]))
                        else:
                            weights = [float(x)/sum(tanimoto_ec_12[c]) for x in tanimoto_ec_12[c]]
                        values12.append(np.dot(weights,values_ec_12[c]))

                # determine which pipeline should be used
                if len(values1) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values1)*60*60,1]
                elif len(values2) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values2)*60*60,2]
                elif len(values3) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values3)*60*60,3]
                elif len(values4) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values4)*60*60,4]
                elif len(values5) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values5)*60*60,5]
                elif len(values6) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values6)*60*60,6]
                elif len(values7) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values7)*60*60,7]
                elif len(values8) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values8)*60*60,8]
                elif len(values9) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values9)*60*60,9]
                elif len(values10) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values10)*60*60,10]
                elif len(values11) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values11)*60*60,11]
                elif len(values12) > 0:
                    kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values12)*60*60,12]
                else:

                    # # # TRY PIPELINE 13-24 # # #

                    # initialize pipeline values
                    values_ec_13 = []
                    values_ec_14 = []
                    values_ec_15 = []
                    values_ec_16 = []
                    values_ec_17 = []
                    values_ec_18 = []
                    values_ec_19 = []
                    values_ec_20 = []
                    values_ec_21 = []
                    values_ec_22 = []
                    values_ec_23 = []
                    values_ec_24 = []

                    # initialize tanimoto indices
                    tanimoto_ec_19 = []
                    tanimoto_ec_20 = []
                    tanimoto_ec_21 = []
                    tanimoto_ec_22 = []
                    tanimoto_ec_23 = []
                    tanimoto_ec_24 = []

                    # get all unique reaction ec numbers with first 3 numbers available
                    ecs1 = []
                    ecs2 = []
                    ecs3 = []
                    for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                        if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-') and (ec.split('.')[2] != '-'):
                            found = False
                            for j in range(len(ecs1)):
                                if (ec.split('.')[0] == ecs1[j]) and (ec.split('.')[1] == ecs2[j]) and (ec.split('.')[2] == ecs3[j]):
                                    found = True
                            if found == False:
                                ecs1.append(ec.split('.')[0])
                                ecs2.append(ec.split('.')[1])
                                ecs3.append(ec.split('.')[2])
                                values_ec_13.append([])
                                values_ec_14.append([])
                                values_ec_15.append([])
                                values_ec_16.append([])
                                values_ec_17.append([])
                                values_ec_18.append([])
                                values_ec_19.append([])
                                values_ec_20.append([])
                                values_ec_21.append([])
                                values_ec_22.append([])
                                values_ec_23.append([])
                                values_ec_24.append([])
                                tanimoto_ec_19.append([])
                                tanimoto_ec_20.append([])
                                tanimoto_ec_21.append([])
                                tanimoto_ec_22.append([])
                                tanimoto_ec_23.append([])
                                tanimoto_ec_24.append([])

                    # iterate over ec number classes
                    for c in range(len(ecs1)):

                        # iterate over all available matching human ec numbers
                        for ec in available_human:
                            if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]) and (ec.split('.')[2] == ecs3[c]):

                                # load human data
                                df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                # iterate over human data
                                for b in range(df_human.shape[0]):

                                    # get substrate ID's
                                    if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                    else:
                                        sub_chebi = np.nan

                                    if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_hmdb = np.nan

                                    if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_inchi = np.nan

                                    if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_kegg = np.nan

                                    if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_pubchem = np.nan

                                    # get temperature, ph, mutation
                                    sub_temp = round(df_human.loc[b]['TEMP'],1)
                                    sub_ph = round(df_human.loc[b]['PH'],1)
                                    if type(df_human.loc[b]['MUTATION']) == str:
                                        mutation = True
                                    else:
                                        mutation = False

                                    # 13: X.X.X.ANY, correct substrate, human, T = 37, pH = correct
                                    for a in range(len(metabolites_forward)):
                                        if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_13[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break

                                    # 14: X.X.X.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                    for a in range(len(metabolites_forward)):
                                        if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_14[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break 

                                    # 15: X.X.X.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                    for a in range(len(metabolites_forward)):
                                        if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_15[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break

                                    # 19: X.X.X.ANY, all substrates, human, T = 37, pH = correct
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_19[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_19[c].append(max(tanimoto))

                                   # 20: X.X.X.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_20[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_20[c].append(max(tanimoto))

                                    # 21: X.X.X.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_21[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_21[c].append(max(tanimoto))    

                        # iterate over all available matching all organism ec numbers
                        for ec in available_all:
                            if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]) and (ec.split('.')[2] == ecs3[c]):

                                # load all data
                                df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                # iterate over all data
                                for b in range(df_human.shape[0]):

                                    # get substrate ID's
                                    if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                    else:
                                        sub_chebi = np.nan

                                    if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_hmdb = np.nan

                                    if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_inchi = np.nan

                                    if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_kegg = np.nan

                                    if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                        sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                    else:
                                        sub_pubchem = np.nan

                                    # get temperature, ph, mutation
                                    sub_temp = round(df_human.loc[b]['TEMP'],1)
                                    sub_ph = round(df_human.loc[b]['PH'],1)
                                    if type(df_human.loc[b]['MUTATION']) == str:
                                        mutation = True
                                    else:
                                        mutation = False

                                   # 16: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                    for a in range(len(metabolites_forward)):
                                        if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_16[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break

                                    # 17: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                    for a in range(len(metabolites_forward)):
                                        if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_17[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break 

                                    # 18: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                    for a in range(len(metabolites_forward)):
                                        if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                            values_ec_18[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                            break

                                    # 22: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_22[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_22[c].append(max(tanimoto))

                                   # 23: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_23[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_23[c].append(max(tanimoto))

                                    # 24: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                    tanimoto = []
                                    for a in range(len(metabolites_forward)):
                                        if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                            # if id's available, report tanimoto distance
                                            if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                            else:
                                                tanimoto.append(0)

                                    if len(tanimoto) > 0:
                                        values_ec_24[c].append(df_human.loc[b]['VALUE'])
                                        tanimoto_ec_24[c].append(max(tanimoto))                     

                    # aggregate data for all valid ec numbers
                    values13 = []
                    values14 = []
                    values15 = []
                    values16 = []
                    values17 = []
                    values18 = []
                    values19 = []
                    values20 = []
                    values21 = []
                    values22 = []
                    values23 = []
                    values24 = []
                    for c in range(len(ecs1)):
                        if len(values_ec_13[c]) > 0:
                            values13.append(np.mean(values_ec_13[c]))
                        if len(values_ec_14[c]) > 0:
                            values14.append(np.mean(values_ec_14[c]))
                        if len(values_ec_15[c]) > 0:
                            values15.append(np.mean(values_ec_15[c]))
                        if len(values_ec_16[c]) > 0:
                            values16.append(np.mean(values_ec_16[c]))
                        if len(values_ec_17[c]) > 0:
                            values17.append(np.mean(values_ec_17[c]))
                        if len(values_ec_18[c]) > 0:
                            values18.append(np.mean(values_ec_18[c]))
                        if len(values_ec_19[c]) > 0:
                            if max(tanimoto_ec_19[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_19[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_19[c]) for x in tanimoto_ec_19[c]]
                            values19.append(np.dot(weights,values_ec_19[c]))
                        if len(values_ec_20[c]) > 0:
                            if max(tanimoto_ec_20[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_20[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_20[c]) for x in tanimoto_ec_20[c]]
                            values20.append(np.dot(weights,values_ec_20[c]))
                        if len(values_ec_21[c]) > 0:
                            if max(tanimoto_ec_21[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_21[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_21[c]) for x in tanimoto_ec_21[c]]
                            values21.append(np.dot(weights,values_ec_21[c]))
                        if len(values_ec_22[c]) > 0:
                            if max(tanimoto_ec_22[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_22[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_22[c]) for x in tanimoto_ec_22[c]]
                            values22.append(np.dot(weights,values_ec_22[c]))
                        if len(values_ec_23[c]) > 0:
                            if max(tanimoto_ec_23[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_23[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_23[c]) for x in tanimoto_ec_23[c]]
                            values23.append(np.dot(weights,values_ec_23[c]))
                        if len(values_ec_24[c]) > 0:
                            if max(tanimoto_ec_24[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_24[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_24[c]) for x in tanimoto_ec_24[c]]
                            values24.append(np.dot(weights,values_ec_24[c]))

                    # determine which pipeline should be used
                    if len(values13) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values13)*60*60,13]
                    elif len(values14) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values14)*60*60,14]
                    elif len(values15) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values15)*60*60,15]
                    elif len(values16) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values16)*60*60,16]
                    elif len(values17) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values17)*60*60,17]
                    elif len(values18) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values18)*60*60,18]
                    elif len(values19) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values19)*60*60,19]
                    elif len(values20) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values20)*60*60,20]
                    elif len(values21) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values21)*60*60,21]
                    elif len(values22) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values22)*60*60,22]
                    elif len(values23) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values23)*60*60,23]
                    elif len(values24) > 0:
                        kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values24)*60*60,24]
                    else:

                        # # # TRY PIPELINE 25-36 # # #

                        # initialize pipeline values
                        values_ec_25 = []
                        values_ec_26 = []
                        values_ec_27 = []
                        values_ec_28 = []
                        values_ec_29 = []
                        values_ec_30 = []
                        values_ec_31 = []
                        values_ec_32 = []
                        values_ec_33 = []
                        values_ec_34 = []
                        values_ec_35 = []
                        values_ec_36 = []

                        # initialize tanimoto indices
                        tanimoto_ec_31 = []
                        tanimoto_ec_32 = []
                        tanimoto_ec_33 = []
                        tanimoto_ec_34 = []
                        tanimoto_ec_35 = []
                        tanimoto_ec_36 = []

                        # get all unique reaction ec numbers with first 2 numbers available
                        ecs1 = []
                        ecs2 = []
                        for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                            if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-'):
                                found = False
                                for j in range(len(ecs1)):
                                    if (ec.split('.')[0] == ecs1[j]) and (ec.split('.')[1] == ecs2[j]):
                                        found = True
                                if found == False:
                                    ecs1.append(ec.split('.')[0])
                                    ecs2.append(ec.split('.')[1])
                                    values_ec_25.append([])
                                    values_ec_26.append([])
                                    values_ec_27.append([])
                                    values_ec_28.append([])
                                    values_ec_29.append([])
                                    values_ec_30.append([])
                                    values_ec_31.append([])
                                    values_ec_32.append([])
                                    values_ec_33.append([])
                                    values_ec_34.append([])
                                    values_ec_35.append([])
                                    values_ec_36.append([])
                                    tanimoto_ec_31.append([])
                                    tanimoto_ec_32.append([])
                                    tanimoto_ec_33.append([])
                                    tanimoto_ec_34.append([])
                                    tanimoto_ec_35.append([])
                                    tanimoto_ec_36.append([])

                        # iterate over ec number classes
                        for c in range(len(ecs1)):

                            # iterate over all available matching human ec numbers
                            for ec in available_human:
                                if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]):

                                    # load human data
                                    df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                    # iterate over human data
                                    for b in range(df_human.shape[0]):

                                        # get substrate ID's
                                        if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                        else:
                                            sub_chebi = np.nan

                                        if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_hmdb = np.nan

                                        if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_inchi = np.nan

                                        if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_kegg = np.nan

                                        if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_pubchem = np.nan

                                        # get temperature, ph, mutation
                                        sub_temp = round(df_human.loc[b]['TEMP'],1)
                                        sub_ph = round(df_human.loc[b]['PH'],1)
                                        if type(df_human.loc[b]['MUTATION']) == str:
                                            mutation = True
                                        else:
                                            mutation = False

                                        # 25: X.X.ANY.ANY, correct substrate, human, T = 37, pH = correct
                                        for a in range(len(metabolites_forward)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_25[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break

                                        # 26: X.X.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                        for a in range(len(metabolites_forward)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_26[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break 

                                        # 27: X.X.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        for a in range(len(metabolites_forward)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_27[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break

                                        # 31: X.X.ANY.ANY, all substrates, human, T = 37, pH = correct
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_31[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_31[c].append(max(tanimoto))

                                       # 32: X.X.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_32[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_32[c].append(max(tanimoto))

                                        # 33: X.X.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_33[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_33[c].append(max(tanimoto))      

                            # iterate over all available matching all organism ec numbers
                            for ec in available_all:
                                if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]):

                                    # load all data
                                    df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                    # iterate over all data
                                    for b in range(df_human.shape[0]):

                                        # get substrate ID's
                                        if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                        else:
                                            sub_chebi = np.nan

                                        if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_hmdb = np.nan

                                        if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_inchi = np.nan

                                        if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_kegg = np.nan

                                        if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_pubchem = np.nan

                                        # get temperature, ph, mutation
                                        sub_temp = round(df_human.loc[b]['TEMP'],1)
                                        sub_ph = round(df_human.loc[b]['PH'],1)
                                        if type(df_human.loc[b]['MUTATION']) == str:
                                            mutation = True
                                        else:
                                            mutation = False

                                       # 28: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                        for a in range(len(metabolites_forward)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_28[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break

                                        # 29: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                        for a in range(len(metabolites_forward)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_29[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break 

                                        # 30: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        for a in range(len(metabolites_forward)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_30[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                break

                                        # 34: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_34[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_34[c].append(max(tanimoto))

                                       # 35: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_35[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_35[c].append(max(tanimoto))

                                        # 36: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        tanimoto = []
                                        for a in range(len(metabolites_forward)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_36[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_36[c].append(max(tanimoto))                

                        # aggregate data for all valid ec numbers
                        values25 = []
                        values26 = []
                        values27 = []
                        values28 = []
                        values29 = []
                        values30 = []
                        values31 = []
                        values32 = []
                        values33 = []
                        values34 = []
                        values35 = []
                        values36 = []
                        for c in range(len(ecs1)):
                            if len(values_ec_25[c]) > 0:
                                values25.append(np.mean(values_ec_25[c]))
                            if len(values_ec_26[c]) > 0:
                                values26.append(np.mean(values_ec_26[c]))
                            if len(values_ec_27[c]) > 0:
                                values27.append(np.mean(values_ec_27[c]))
                            if len(values_ec_28[c]) > 0:
                                values28.append(np.mean(values_ec_28[c]))
                            if len(values_ec_29[c]) > 0:
                                values29.append(np.mean(values_ec_29[c]))
                            if len(values_ec_30[c]) > 0:
                                values30.append(np.mean(values_ec_30[c]))
                            if len(values_ec_31[c]) > 0:
                                if max(tanimoto_ec_31[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_31[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_31[c]) for x in tanimoto_ec_31[c]]
                                values31.append(np.dot(weights,values_ec_31[c]))
                            if len(values_ec_32[c]) > 0:
                                if max(tanimoto_ec_32[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_32[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_32[c]) for x in tanimoto_ec_32[c]]
                                values32.append(np.dot(weights,values_ec_32[c]))
                            if len(values_ec_33[c]) > 0:
                                if max(tanimoto_ec_33[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_33[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_33[c]) for x in tanimoto_ec_33[c]]
                                values33.append(np.dot(weights,values_ec_33[c]))
                            if len(values_ec_34[c]) > 0:
                                if max(tanimoto_ec_34[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_34[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_34[c]) for x in tanimoto_ec_34[c]]
                                values34.append(np.dot(weights,values_ec_34[c]))
                            if len(values_ec_35[c]) > 0:
                                if max(tanimoto_ec_35[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_35[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_35[c]) for x in tanimoto_ec_35[c]]
                                values35.append(np.dot(weights,values_ec_35[c]))
                            if len(values_ec_36[c]) > 0:
                                if max(tanimoto_ec_36[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_36[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_36[c]) for x in tanimoto_ec_36[c]]
                                values36.append(np.dot(weights,values_ec_36[c]))

                        # determine which pipeline should be used
                        if len(values25) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values25)*60*60,25]
                        elif len(values26) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values26)*60*60,26]
                        elif len(values27) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values27)*60*60,27]
                        elif len(values28) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values28)*60*60,28]
                        elif len(values29) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values29)*60*60,29]
                        elif len(values30) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values30)*60*60,30]
                        elif len(values31) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values31)*60*60,31]
                        elif len(values32) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values32)*60*60,32]
                        elif len(values33) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values33)*60*60,33]
                        elif len(values34) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values34)*60*60,34]
                        elif len(values35) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values35)*60*60,35]
                        elif len(values36) > 0:
                            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values36)*60*60,36]
                        else:

                            # # # TRY PIPELINE 37-48 # # #

                            # initialize pipeline values
                            values_ec_37 = []
                            values_ec_38 = []
                            values_ec_39 = []
                            values_ec_40 = []
                            values_ec_41 = []
                            values_ec_42 = []
                            values_ec_43 = []
                            values_ec_44 = []
                            values_ec_45 = []
                            values_ec_46 = []
                            values_ec_47 = []
                            values_ec_48 = []

                            # initialize tanimoto indices
                            tanimoto_ec_43 = []
                            tanimoto_ec_44 = []
                            tanimoto_ec_45 = []
                            tanimoto_ec_46 = []
                            tanimoto_ec_47 = []
                            tanimoto_ec_48 = []

                            # get all unique reaction ec numbers with first 1 number available
                            ecs1 = []
                            for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                                if (ec.split('.')[0] != '-'):
                                    found = False
                                    for j in range(len(ecs1)):
                                        if (ec.split('.')[0] == ecs1[j]):
                                            found = True
                                    if found == False:
                                        ecs1.append(ec.split('.')[0])
                                        values_ec_37.append([])
                                        values_ec_38.append([])
                                        values_ec_39.append([])
                                        values_ec_40.append([])
                                        values_ec_41.append([])
                                        values_ec_42.append([])
                                        values_ec_43.append([])
                                        values_ec_44.append([])
                                        values_ec_45.append([])
                                        values_ec_46.append([])
                                        values_ec_47.append([])
                                        values_ec_48.append([])
                                        tanimoto_ec_43.append([])
                                        tanimoto_ec_44.append([])
                                        tanimoto_ec_45.append([])
                                        tanimoto_ec_46.append([])
                                        tanimoto_ec_47.append([])
                                        tanimoto_ec_48.append([])

                            # iterate over ec number classes
                            for c in range(len(ecs1)):

                                # iterate over all available matching human ec numbers
                                for ec in available_human:
                                    if (ec.split('.')[0] == ecs1[c]):

                                        # load human data
                                        df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                        # iterate over human data
                                        for b in range(df_human.shape[0]):

                                            # get substrate ID's
                                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                            else:
                                                sub_chebi = np.nan

                                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_hmdb = np.nan

                                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_inchi = np.nan

                                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_kegg = np.nan

                                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_pubchem = np.nan

                                            # get temperature, ph, mutation
                                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                                            sub_ph = round(df_human.loc[b]['PH'],1)
                                            if type(df_human.loc[b]['MUTATION']) == str:
                                                mutation = True
                                            else:
                                                mutation = False

                                            # 37: X.ANY.ANY.ANY, correct substrate, human, T = 37, pH = correct
                                            for a in range(len(metabolites_forward)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_37[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break

                                            # 38: X.ANY.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                            for a in range(len(metabolites_forward)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_38[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break 

                                            # 39: X.ANY.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            for a in range(len(metabolites_forward)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_39[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break

                                            # 43: X.ANY.ANY.ANY, all substrates, human, T = 37, pH = correct
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_43[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_43[c].append(max(tanimoto))

                                           # 44: X.ANY.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_44[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_44[c].append(max(tanimoto))

                                            # 45: X.ANY.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_45[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_45[c].append(max(tanimoto))         

                                # iterate over all available matching all organism ec numbers
                                for ec in available_all:
                                    if (ec.split('.')[0] == ecs1[c]):

                                        # load all data
                                        df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                        # iterate over all data
                                        for b in range(df_human.shape[0]):

                                            # get substrate ID's
                                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                            else:
                                                sub_chebi = np.nan

                                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_hmdb = np.nan

                                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_inchi = np.nan

                                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_kegg = np.nan

                                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_pubchem = np.nan

                                            # get temperature, ph, mutation
                                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                                            sub_ph = round(df_human.loc[b]['PH'],1)
                                            if type(df_human.loc[b]['MUTATION']) == str:
                                                mutation = True
                                            else:
                                                mutation = False

                                            # 40: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                            for a in range(len(metabolites_forward)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_40[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break

                                            # 41: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                            for a in range(len(metabolites_forward)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_41[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break 

                                            # 42: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            for a in range(len(metabolites_forward)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_42[c].append(df_human.loc[b]['VALUE'] / stoich_forward[a])
                                                    break

                                            # 46: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_46[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_46[c].append(max(tanimoto))

                                           # 47: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_47[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_47[c].append(max(tanimoto))

                                            # 48: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            tanimoto = []
                                            for a in range(len(metabolites_forward)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_48[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_48[c].append(max(tanimoto))                    

                            # aggregate data for all valid ec numbers
                            values37 = []
                            values38 = []
                            values39 = []
                            values40 = []
                            values41 = []
                            values42 = []
                            values43 = []
                            values44 = []
                            values45 = []
                            values46 = []
                            values47 = []
                            values48 = []
                            for c in range(len(ecs1)):
                                if len(values_ec_37[c]) > 0:
                                    values37.append(np.mean(values_ec_37[c]))
                                if len(values_ec_38[c]) > 0:
                                    values38.append(np.mean(values_ec_38[c]))
                                if len(values_ec_39[c]) > 0:
                                    values39.append(np.mean(values_ec_39[c]))
                                if len(values_ec_40[c]) > 0:
                                    values40.append(np.mean(values_ec_40[c]))
                                if len(values_ec_41[c]) > 0:
                                    values41.append(np.mean(values_ec_41[c]))
                                if len(values_ec_42[c]) > 0:
                                    values42.append(np.mean(values_ec_42[c]))
                                if len(values_ec_43[c]) > 0:
                                    if max(tanimoto_ec_43[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_43[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_43[c]) for x in tanimoto_ec_43[c]]
                                    values43.append(np.dot(weights,values_ec_43[c]))
                                if len(values_ec_44[c]) > 0:
                                    if max(tanimoto_ec_44[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_44[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_44[c]) for x in tanimoto_ec_44[c]]
                                    values44.append(np.dot(weights,values_ec_44[c]))
                                if len(values_ec_45[c]) > 0:
                                    if max(tanimoto_ec_45[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_45[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_45[c]) for x in tanimoto_ec_45[c]]
                                    values45.append(np.dot(weights,values_ec_45[c]))
                                if len(values_ec_46[c]) > 0:
                                    if max(tanimoto_ec_46[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_46[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_46[c]) for x in tanimoto_ec_46[c]]
                                    values46.append(np.dot(weights,values_ec_46[c]))
                                if len(values_ec_47[c]) > 0:
                                    if max(tanimoto_ec_47[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_47[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_47[c]) for x in tanimoto_ec_47[c]]
                                    values47.append(np.dot(weights,values_ec_47[c]))
                                if len(values_ec_48[c]) > 0:
                                    if max(tanimoto_ec_48[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_48[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_48[c]) for x in tanimoto_ec_48[c]]
                                    values48.append(np.dot(weights,values_ec_48[c]))

                            # determine which pipeline should be used
                            if len(values37) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values37)*60*60,37]
                            elif len(values38) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values38)*60*60,38]
                            elif len(values39) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values39)*60*60,39]
                            elif len(values40) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values40)*60*60,40]
                            elif len(values41) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values41)*60*60,41]
                            elif len(values42) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values42)*60*60,42]
                            elif len(values43) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values43)*60*60,43]
                            elif len(values44) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values44)*60*60,44]
                            elif len(values45) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values45)*60*60,45]
                            elif len(values46) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values46)*60*60,46]
                            elif len(values47) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values47)*60*60,47]
                            elif len(values48) > 0:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values48)*60*60,48]
                            else:
                                kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]

                # # # REVERSE DIRECTION # # #

                if len(metabolites_reverse) > 0:

                    # get metabolite ID's and pH's
                    met_chebi = []
                    met_kegg = []
                    met_pubchem = []
                    met_inchi = []
                    met_hmdb = []
                    met_smiles = []
                    met_ph = []
                    for a in range(len(metabolites_reverse)):

                        # chebi
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['CHEBI']) == str:
                            met_chebi.append(str(int(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['CHEBI'])))
                        else:
                            met_chebi.append(np.nan)

                        # kegg
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['KEGG']) == str:
                            met_kegg.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['KEGG'])
                        else:
                            met_kegg.append(np.nan)

                        # pubchem
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['PUBCHEM']) == str:
                            met_pubchem.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['PUBCHEM'])
                        else:
                            met_pubchem.append(np.nan)

                        # inchi
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['INCHI STRING']) == str:
                            met_inchi.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['INCHI STRING'])
                        else:
                            met_inchi.append(np.nan)

                        # hmdb
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['HMDB']) == str:
                            met_hmdb.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['HMDB'])
                        else:
                            met_hmdb.append(np.nan)

                        # smiles
                        if type(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['SMILE']) == str:
                            met_smiles.append(df_metabolites.loc[df_metabolites['METID'].tolist().index(metabolites_reverse[a])]['SMILE'])
                        else:
                            met_smiles.append(np.nan)

                        # ph
                        met_ph.append(ph[metabolites_reverse[a][-2]])

                    # # # TRY PIPELINE 1-12 # # #

                    # initialize pipeline values
                    values_ec_1 = []
                    values_ec_2 = []
                    values_ec_3 = []
                    values_ec_4 = []
                    values_ec_5 = []
                    values_ec_6 = []
                    values_ec_7 = []
                    values_ec_8 = []
                    values_ec_9 = []
                    values_ec_10 = []
                    values_ec_11 = []
                    values_ec_12 = []

                    # initialize tanimoto indices
                    tanimoto_ec_7 = []
                    tanimoto_ec_8 = []
                    tanimoto_ec_9 = []
                    tanimoto_ec_10 = []
                    tanimoto_ec_11 = []
                    tanimoto_ec_12 = []

                    # get all reaction ec numbers with all 4 numbers available
                    ecs = []
                    for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                        if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-') and (ec.split('.')[2] != '-') and (ec.split('.')[3] != '-'):
                            ecs.append(ec)
                            values_ec_1.append([])
                            values_ec_2.append([])
                            values_ec_3.append([])
                            values_ec_4.append([])
                            values_ec_5.append([])
                            values_ec_6.append([])
                            values_ec_7.append([])
                            values_ec_8.append([])
                            values_ec_9.append([])
                            values_ec_10.append([])
                            values_ec_11.append([])
                            values_ec_12.append([])
                            tanimoto_ec_7.append([])
                            tanimoto_ec_8.append([])
                            tanimoto_ec_9.append([])
                            tanimoto_ec_10.append([])
                            tanimoto_ec_11.append([])
                            tanimoto_ec_12.append([])

                    # iterate over ec numbers
                    for c,ec in enumerate(ecs):

                        # determine if human data available
                        if ec in available_human:

                            # load human data
                            df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                            # iterate over human data
                            for b in range(df_human.shape[0]):

                                # get substrate ID's
                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                else:
                                    sub_chebi = np.nan

                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_hmdb = np.nan

                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_inchi = np.nan

                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_kegg = np.nan

                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_pubchem = np.nan

                                # get temperature, ph, mutation
                                sub_temp = round(df_human.loc[b]['TEMP'],1)
                                sub_ph = round(df_human.loc[b]['PH'],1)
                                if type(df_human.loc[b]['MUTATION']) == str:
                                    mutation = True
                                else:
                                    mutation = False

                                # 1: correct ec, correct substrate, human, T = 37, pH = correct
                                for a in range(len(metabolites_reverse)):
                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_1[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break

                                # 2: correct ec, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                for a in range(len(metabolites_reverse)):
                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_2[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break 

                                # 3: correct ec, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                for a in range(len(metabolites_reverse)):
                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_3[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break

                                # 7: correct ec, all substrates, human, T = 37, pH = correct
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_7[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_7[c].append(max(tanimoto))

                               # 8: correct ec, all substrates, human, T = 25-40, pH within 1.0 of correct
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_8[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_8[c].append(max(tanimoto))

                                # 9: correct ec, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_9[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_9[c].append(max(tanimoto))    

                        # determine if all data available
                        if ec in available_all:

                            # load all data
                            df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                            # iterate over all data
                            for b in range(df_human.shape[0]):

                                # get substrate ID's
                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                else:
                                    sub_chebi = np.nan

                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_hmdb = np.nan

                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_inchi = np.nan

                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_kegg = np.nan

                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                else:
                                    sub_pubchem = np.nan

                                # get temperature, ph, mutation
                                sub_temp = round(df_human.loc[b]['TEMP'],1)
                                sub_ph = round(df_human.loc[b]['PH'],1)
                                if type(df_human.loc[b]['MUTATION']) == str:
                                    mutation = True
                                else:
                                    mutation = False

                                # 4: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                for a in range(len(metabolites_reverse)):
                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_4[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break

                                # 5: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                for a in range(len(metabolites_reverse)):
                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_5[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break 

                                # 6: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                for a in range(len(metabolites_reverse)):
                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                        values_ec_6[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                        break

                                # 10: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_10[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_10[c].append(max(tanimoto))

                               # 11: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_11[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_11[c].append(max(tanimoto))

                                # 12: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                tanimoto = []
                                for a in range(len(metabolites_reverse)):
                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                        # if id's available, report tanimoto distance
                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                        else:
                                            tanimoto.append(0)

                                if len(tanimoto) > 0:
                                    values_ec_12[c].append(df_human.loc[b]['VALUE'])
                                    tanimoto_ec_12[c].append(max(tanimoto))                  

                    # aggregate data for all valid ec numbers
                    values1 = []
                    values2 = []
                    values3 = []
                    values4 = []
                    values5 = []
                    values6 = []
                    values7 = []
                    values8 = []
                    values9 = []
                    values10 = []
                    values11 = []
                    values12 = []
                    for c in range(len(ecs)):
                        if len(values_ec_1[c]) > 0:
                            values1.append(np.mean(values_ec_1[c]))
                        if len(values_ec_2[c]) > 0:
                            values2.append(np.mean(values_ec_2[c]))
                        if len(values_ec_3[c]) > 0:
                            values3.append(np.mean(values_ec_3[c]))
                        if len(values_ec_4[c]) > 0:
                            values4.append(np.mean(values_ec_4[c]))
                        if len(values_ec_5[c]) > 0:
                            values5.append(np.mean(values_ec_5[c]))
                        if len(values_ec_6[c]) > 0:
                            values6.append(np.mean(values_ec_6[c]))
                        if len(values_ec_7[c]) > 0:
                            if max(tanimoto_ec_7[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_7[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_7[c]) for x in tanimoto_ec_7[c]]
                            values7.append(np.dot(weights,values_ec_7[c]))
                        if len(values_ec_8[c]) > 0:
                            if max(tanimoto_ec_8[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_8[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_8[c]) for x in tanimoto_ec_8[c]]
                            values8.append(np.dot(weights,values_ec_8[c]))
                        if len(values_ec_9[c]) > 0:
                            if max(tanimoto_ec_9[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_9[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_9[c]) for x in tanimoto_ec_9[c]]
                            values9.append(np.dot(weights,values_ec_9[c]))
                        if len(values_ec_10[c]) > 0:
                            if max(tanimoto_ec_10[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_10[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_10[c]) for x in tanimoto_ec_10[c]]
                            values10.append(np.dot(weights,values_ec_10[c]))
                        if len(values_ec_11[c]) > 0:
                            if max(tanimoto_ec_11[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_11[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_11[c]) for x in tanimoto_ec_11[c]]
                            values11.append(np.dot(weights,values_ec_11[c]))
                        if len(values_ec_12[c]) > 0:
                            if max(tanimoto_ec_12[c]) == 0:
                                weights = np.ones(len(tanimoto_ec_12[c]))
                            else:
                                weights = [float(x)/sum(tanimoto_ec_12[c]) for x in tanimoto_ec_12[c]]
                            values12.append(np.dot(weights,values_ec_12[c]))

                    # determine which pipeline should be used
                    if len(values1) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values1)*60*60,1]
                    elif len(values2) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values2)*60*60,2]
                    elif len(values3) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values3)*60*60,3]
                    elif len(values4) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values4)*60*60,4]
                    elif len(values5) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values5)*60*60,5]
                    elif len(values6) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values6)*60*60,6]
                    elif len(values7) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values7)*60*60,7]
                    elif len(values8) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values8)*60*60,8]
                    elif len(values9) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values9)*60*60,9]
                    elif len(values10) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values10)*60*60,10]
                    elif len(values11) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values11)*60*60,11]
                    elif len(values12) > 0:
                        kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values12)*60*60,12]
                    else:

                        # # # TRY PIPELINE 13-24 # # #

                        # initialize pipeline values
                        values_ec_13 = []
                        values_ec_14 = []
                        values_ec_15 = []
                        values_ec_16 = []
                        values_ec_17 = []
                        values_ec_18 = []
                        values_ec_19 = []
                        values_ec_20 = []
                        values_ec_21 = []
                        values_ec_22 = []
                        values_ec_23 = []
                        values_ec_24 = []

                        # initialize tanimoto indices
                        tanimoto_ec_19 = []
                        tanimoto_ec_20 = []
                        tanimoto_ec_21 = []
                        tanimoto_ec_22 = []
                        tanimoto_ec_23 = []
                        tanimoto_ec_24 = []

                        # get all unique reaction ec numbers with first 3 numbers available
                        ecs1 = []
                        ecs2 = []
                        ecs3 = []
                        for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                            if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-') and (ec.split('.')[2] != '-'):
                                found = False
                                for j in range(len(ecs1)):
                                    if (ec.split('.')[0] == ecs1[j]) and (ec.split('.')[1] == ecs2[j]) and (ec.split('.')[2] == ecs3[j]):
                                        found = True
                                if found == False:
                                    ecs1.append(ec.split('.')[0])
                                    ecs2.append(ec.split('.')[1])
                                    ecs3.append(ec.split('.')[2])
                                    values_ec_13.append([])
                                    values_ec_14.append([])
                                    values_ec_15.append([])
                                    values_ec_16.append([])
                                    values_ec_17.append([])
                                    values_ec_18.append([])
                                    values_ec_19.append([])
                                    values_ec_20.append([])
                                    values_ec_21.append([])
                                    values_ec_22.append([])
                                    values_ec_23.append([])
                                    values_ec_24.append([])
                                    tanimoto_ec_19.append([])
                                    tanimoto_ec_20.append([])
                                    tanimoto_ec_21.append([])
                                    tanimoto_ec_22.append([])
                                    tanimoto_ec_23.append([])
                                    tanimoto_ec_24.append([])

                        # iterate over ec number classes
                        for c in range(len(ecs1)):

                            # iterate over all available matching human ec numbers
                            for ec in available_human:
                                if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]) and (ec.split('.')[2] == ecs3[c]):

                                    # load human data
                                    df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                    # iterate over human data
                                    for b in range(df_human.shape[0]):

                                        # get substrate ID's
                                        if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                        else:
                                            sub_chebi = np.nan

                                        if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_hmdb = np.nan

                                        if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_inchi = np.nan

                                        if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_kegg = np.nan

                                        if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_pubchem = np.nan

                                        # get temperature, ph, mutation
                                        sub_temp = round(df_human.loc[b]['TEMP'],1)
                                        sub_ph = round(df_human.loc[b]['PH'],1)
                                        if type(df_human.loc[b]['MUTATION']) == str:
                                            mutation = True
                                        else:
                                            mutation = False

                                        # 13: X.X.X.ANY, correct substrate, human, T = 37, pH = correct
                                        for a in range(len(metabolites_reverse)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_13[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break

                                        # 14: X.X.X.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                        for a in range(len(metabolites_reverse)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_14[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break 

                                        # 15: X.X.X.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        for a in range(len(metabolites_reverse)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_15[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break

                                        # 19: X.X.X.ANY, all substrates, human, T = 37, pH = correct
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_19[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_19[c].append(max(tanimoto))

                                       # 20: X.X.X.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_20[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_20[c].append(max(tanimoto))

                                        # 21: X.X.X.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_21[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_21[c].append(max(tanimoto))    

                            # iterate over all available matching all organism ec numbers
                            for ec in available_all:
                                if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]) and (ec.split('.')[2] == ecs3[c]):

                                    # load all data
                                    df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                    # iterate over all data
                                    for b in range(df_human.shape[0]):

                                        # get substrate ID's
                                        if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                        else:
                                            sub_chebi = np.nan

                                        if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_hmdb = np.nan

                                        if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_inchi = np.nan

                                        if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_kegg = np.nan

                                        if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                            sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                        else:
                                            sub_pubchem = np.nan

                                        # get temperature, ph, mutation
                                        sub_temp = round(df_human.loc[b]['TEMP'],1)
                                        sub_ph = round(df_human.loc[b]['PH'],1)
                                        if type(df_human.loc[b]['MUTATION']) == str:
                                            mutation = True
                                        else:
                                            mutation = False

                                       # 16: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                        for a in range(len(metabolites_reverse)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_16[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break

                                        # 17: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                        for a in range(len(metabolites_reverse)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_17[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break 

                                        # 18: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        for a in range(len(metabolites_reverse)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                values_ec_18[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                break

                                        # 22: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_22[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_22[c].append(max(tanimoto))

                                       # 23: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_23[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_23[c].append(max(tanimoto))

                                        # 24: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                        tanimoto = []
                                        for a in range(len(metabolites_reverse)):
                                            if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                # if id's available, report tanimoto distance
                                                if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                    tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                else:
                                                    tanimoto.append(0)

                                        if len(tanimoto) > 0:
                                            values_ec_24[c].append(df_human.loc[b]['VALUE'])
                                            tanimoto_ec_24[c].append(max(tanimoto))                     

                        # aggregate data for all valid ec numbers
                        values13 = []
                        values14 = []
                        values15 = []
                        values16 = []
                        values17 = []
                        values18 = []
                        values19 = []
                        values20 = []
                        values21 = []
                        values22 = []
                        values23 = []
                        values24 = []
                        for c in range(len(ecs1)):
                            if len(values_ec_13[c]) > 0:
                                values13.append(np.mean(values_ec_13[c]))
                            if len(values_ec_14[c]) > 0:
                                values14.append(np.mean(values_ec_14[c]))
                            if len(values_ec_15[c]) > 0:
                                values15.append(np.mean(values_ec_15[c]))
                            if len(values_ec_16[c]) > 0:
                                values16.append(np.mean(values_ec_16[c]))
                            if len(values_ec_17[c]) > 0:
                                values17.append(np.mean(values_ec_17[c]))
                            if len(values_ec_18[c]) > 0:
                                values18.append(np.mean(values_ec_18[c]))
                            if len(values_ec_19[c]) > 0:
                                if max(tanimoto_ec_19[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_19[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_19[c]) for x in tanimoto_ec_19[c]]
                                values19.append(np.dot(weights,values_ec_19[c]))
                            if len(values_ec_20[c]) > 0:
                                if max(tanimoto_ec_20[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_20[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_20[c]) for x in tanimoto_ec_20[c]]
                                values20.append(np.dot(weights,values_ec_20[c]))
                            if len(values_ec_21[c]) > 0:
                                if max(tanimoto_ec_21[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_21[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_21[c]) for x in tanimoto_ec_21[c]]
                                values21.append(np.dot(weights,values_ec_21[c]))
                            if len(values_ec_22[c]) > 0:
                                if max(tanimoto_ec_22[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_22[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_22[c]) for x in tanimoto_ec_22[c]]
                                values22.append(np.dot(weights,values_ec_22[c]))
                            if len(values_ec_23[c]) > 0:
                                if max(tanimoto_ec_23[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_23[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_23[c]) for x in tanimoto_ec_23[c]]
                                values23.append(np.dot(weights,values_ec_23[c]))
                            if len(values_ec_24[c]) > 0:
                                if max(tanimoto_ec_24[c]) == 0:
                                    weights = np.ones(len(tanimoto_ec_24[c]))
                                else:
                                    weights = [float(x)/sum(tanimoto_ec_24[c]) for x in tanimoto_ec_24[c]]
                                values24.append(np.dot(weights,values_ec_24[c]))

                        # determine which pipeline should be used
                        if len(values13) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values13)*60*60,13]
                        elif len(values14) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values14)*60*60,14]
                        elif len(values15) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values15)*60*60,15]
                        elif len(values16) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values16)*60*60,16]
                        elif len(values17) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values17)*60*60,17]
                        elif len(values18) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values18)*60*60,18]
                        elif len(values19) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values19)*60*60,19]
                        elif len(values20) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values20)*60*60,20]
                        elif len(values21) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values21)*60*60,21]
                        elif len(values22) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values22)*60*60,22]
                        elif len(values23) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values23)*60*60,23]
                        elif len(values24) > 0:
                            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values24)*60*60,24]
                        else:

                            # # # TRY PIPELINE 25-36 # # #

                            # initialize pipeline values
                            values_ec_25 = []
                            values_ec_26 = []
                            values_ec_27 = []
                            values_ec_28 = []
                            values_ec_29 = []
                            values_ec_30 = []
                            values_ec_31 = []
                            values_ec_32 = []
                            values_ec_33 = []
                            values_ec_34 = []
                            values_ec_35 = []
                            values_ec_36 = []

                            # initialize tanimoto indices
                            tanimoto_ec_31 = []
                            tanimoto_ec_32 = []
                            tanimoto_ec_33 = []
                            tanimoto_ec_34 = []
                            tanimoto_ec_35 = []
                            tanimoto_ec_36 = []

                            # get all unique reaction ec numbers with first 2 numbers available
                            ecs1 = []
                            ecs2 = []
                            for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                                if (ec.split('.')[0] != '-') and (ec.split('.')[1] != '-'):
                                    found = False
                                    for j in range(len(ecs1)):
                                        if (ec.split('.')[0] == ecs1[j]) and (ec.split('.')[1] == ecs2[j]):
                                            found = True
                                    if found == False:
                                        ecs1.append(ec.split('.')[0])
                                        ecs2.append(ec.split('.')[1])
                                        values_ec_25.append([])
                                        values_ec_26.append([])
                                        values_ec_27.append([])
                                        values_ec_28.append([])
                                        values_ec_29.append([])
                                        values_ec_30.append([])
                                        values_ec_31.append([])
                                        values_ec_32.append([])
                                        values_ec_33.append([])
                                        values_ec_34.append([])
                                        values_ec_35.append([])
                                        values_ec_36.append([])
                                        tanimoto_ec_31.append([])
                                        tanimoto_ec_32.append([])
                                        tanimoto_ec_33.append([])
                                        tanimoto_ec_34.append([])
                                        tanimoto_ec_35.append([])
                                        tanimoto_ec_36.append([])

                            # iterate over ec number classes
                            for c in range(len(ecs1)):

                                # iterate over all available matching human ec numbers
                                for ec in available_human:
                                    if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]):

                                        # load human data
                                        df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                        # iterate over human data
                                        for b in range(df_human.shape[0]):

                                            # get substrate ID's
                                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                            else:
                                                sub_chebi = np.nan

                                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_hmdb = np.nan

                                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_inchi = np.nan

                                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_kegg = np.nan

                                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_pubchem = np.nan

                                            # get temperature, ph, mutation
                                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                                            sub_ph = round(df_human.loc[b]['PH'],1)
                                            if type(df_human.loc[b]['MUTATION']) == str:
                                                mutation = True
                                            else:
                                                mutation = False

                                            # 25: X.X.ANY.ANY, correct substrate, human, T = 37, pH = correct
                                            for a in range(len(metabolites_reverse)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_25[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break

                                            # 26: X.X.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                            for a in range(len(metabolites_reverse)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_26[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break 

                                            # 27: X.X.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            for a in range(len(metabolites_reverse)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_27[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break

                                            # 31: X.X.ANY.ANY, all substrates, human, T = 37, pH = correct
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_31[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_31[c].append(max(tanimoto))

                                           # 32: X.X.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_32[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_32[c].append(max(tanimoto))

                                            # 33: X.X.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_33[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_33[c].append(max(tanimoto))      

                                # iterate over all available matching all organism ec numbers
                                for ec in available_all:
                                    if (ec.split('.')[0] == ecs1[c]) and (ec.split('.')[1] == ecs2[c]):

                                        # load all data
                                        df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                        # iterate over all data
                                        for b in range(df_human.shape[0]):

                                            # get substrate ID's
                                            if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                            else:
                                                sub_chebi = np.nan

                                            if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_hmdb = np.nan

                                            if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_inchi = np.nan

                                            if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_kegg = np.nan

                                            if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                            else:
                                                sub_pubchem = np.nan

                                            # get temperature, ph, mutation
                                            sub_temp = round(df_human.loc[b]['TEMP'],1)
                                            sub_ph = round(df_human.loc[b]['PH'],1)
                                            if type(df_human.loc[b]['MUTATION']) == str:
                                                mutation = True
                                            else:
                                                mutation = False

                                           # 28: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                            for a in range(len(metabolites_reverse)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_28[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break

                                            # 29: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                            for a in range(len(metabolites_reverse)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_29[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break 

                                            # 30: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            for a in range(len(metabolites_reverse)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                    values_ec_30[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                    break

                                            # 34: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_34[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_34[c].append(max(tanimoto))

                                           # 35: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_35[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_35[c].append(max(tanimoto))

                                            # 36: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                            tanimoto = []
                                            for a in range(len(metabolites_reverse)):
                                                if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                    # if id's available, report tanimoto distance
                                                    if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                        tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                    else:
                                                        tanimoto.append(0)

                                            if len(tanimoto) > 0:
                                                values_ec_36[c].append(df_human.loc[b]['VALUE'])
                                                tanimoto_ec_36[c].append(max(tanimoto))                

                            # aggregate data for all valid ec numbers
                            values25 = []
                            values26 = []
                            values27 = []
                            values28 = []
                            values29 = []
                            values30 = []
                            values31 = []
                            values32 = []
                            values33 = []
                            values34 = []
                            values35 = []
                            values36 = []
                            for c in range(len(ecs1)):
                                if len(values_ec_25[c]) > 0:
                                    values25.append(np.mean(values_ec_25[c]))
                                if len(values_ec_26[c]) > 0:
                                    values26.append(np.mean(values_ec_26[c]))
                                if len(values_ec_27[c]) > 0:
                                    values27.append(np.mean(values_ec_27[c]))
                                if len(values_ec_28[c]) > 0:
                                    values28.append(np.mean(values_ec_28[c]))
                                if len(values_ec_29[c]) > 0:
                                    values29.append(np.mean(values_ec_29[c]))
                                if len(values_ec_30[c]) > 0:
                                    values30.append(np.mean(values_ec_30[c]))
                                if len(values_ec_31[c]) > 0:
                                    if max(tanimoto_ec_31[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_31[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_31[c]) for x in tanimoto_ec_31[c]]
                                    values31.append(np.dot(weights,values_ec_31[c]))
                                if len(values_ec_32[c]) > 0:
                                    if max(tanimoto_ec_32[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_32[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_32[c]) for x in tanimoto_ec_32[c]]
                                    values32.append(np.dot(weights,values_ec_32[c]))
                                if len(values_ec_33[c]) > 0:
                                    if max(tanimoto_ec_33[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_33[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_33[c]) for x in tanimoto_ec_33[c]]
                                    values33.append(np.dot(weights,values_ec_33[c]))
                                if len(values_ec_34[c]) > 0:
                                    if max(tanimoto_ec_34[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_34[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_34[c]) for x in tanimoto_ec_34[c]]
                                    values34.append(np.dot(weights,values_ec_34[c]))
                                if len(values_ec_35[c]) > 0:
                                    if max(tanimoto_ec_35[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_35[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_35[c]) for x in tanimoto_ec_35[c]]
                                    values35.append(np.dot(weights,values_ec_35[c]))
                                if len(values_ec_36[c]) > 0:
                                    if max(tanimoto_ec_36[c]) == 0:
                                        weights = np.ones(len(tanimoto_ec_36[c]))
                                    else:
                                        weights = [float(x)/sum(tanimoto_ec_36[c]) for x in tanimoto_ec_36[c]]
                                    values36.append(np.dot(weights,values_ec_36[c]))

                            # determine which pipeline should be used
                            if len(values25) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values25)*60*60,25]
                            elif len(values26) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values26)*60*60,26]
                            elif len(values27) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values27)*60*60,27]
                            elif len(values28) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values28)*60*60,28]
                            elif len(values29) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values29)*60*60,29]
                            elif len(values30) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values30)*60*60,30]
                            elif len(values31) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values31)*60*60,31]
                            elif len(values32) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values32)*60*60,32]
                            elif len(values33) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values33)*60*60,33]
                            elif len(values34) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values34)*60*60,34]
                            elif len(values35) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values35)*60*60,35]
                            elif len(values36) > 0:
                                kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values36)*60*60,36]
                            else:

                                # # # TRY PIPELINE 37-48 # # #

                                # initialize pipeline values
                                values_ec_37 = []
                                values_ec_38 = []
                                values_ec_39 = []
                                values_ec_40 = []
                                values_ec_41 = []
                                values_ec_42 = []
                                values_ec_43 = []
                                values_ec_44 = []
                                values_ec_45 = []
                                values_ec_46 = []
                                values_ec_47 = []
                                values_ec_48 = []

                                # initialize tanimoto indices
                                tanimoto_ec_43 = []
                                tanimoto_ec_44 = []
                                tanimoto_ec_45 = []
                                tanimoto_ec_46 = []
                                tanimoto_ec_47 = []
                                tanimoto_ec_48 = []

                                # get all unique reaction ec numbers with first 1 number available
                                ecs1 = []
                                for ec in df_reactions.loc[i]['EC NUMBER'].split(' | '):
                                    if (ec.split('.')[0] != '-'):
                                        found = False
                                        for j in range(len(ecs1)):
                                            if (ec.split('.')[0] == ecs1[j]):
                                                found = True
                                        if found == False:
                                            ecs1.append(ec.split('.')[0])
                                            values_ec_37.append([])
                                            values_ec_38.append([])
                                            values_ec_39.append([])
                                            values_ec_40.append([])
                                            values_ec_41.append([])
                                            values_ec_42.append([])
                                            values_ec_43.append([])
                                            values_ec_44.append([])
                                            values_ec_45.append([])
                                            values_ec_46.append([])
                                            values_ec_47.append([])
                                            values_ec_48.append([])
                                            tanimoto_ec_43.append([])
                                            tanimoto_ec_44.append([])
                                            tanimoto_ec_45.append([])
                                            tanimoto_ec_46.append([])
                                            tanimoto_ec_47.append([])
                                            tanimoto_ec_48.append([])

                                # iterate over ec number classes
                                for c in range(len(ecs1)):

                                    # iterate over all available matching human ec numbers
                                    for ec in available_human:
                                        if (ec.split('.')[0] == ecs1[c]):

                                            # load human data
                                            df_human = pd.read_table('_data_/processing/brenda/human/%s.tsv' % ec)

                                            # iterate over human data
                                            for b in range(df_human.shape[0]):

                                                # get substrate ID's
                                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                                else:
                                                    sub_chebi = np.nan

                                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_hmdb = np.nan

                                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_inchi = np.nan

                                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_kegg = np.nan

                                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_pubchem = np.nan

                                                # get temperature, ph, mutation
                                                sub_temp = round(df_human.loc[b]['TEMP'],1)
                                                sub_ph = round(df_human.loc[b]['PH'],1)
                                                if type(df_human.loc[b]['MUTATION']) == str:
                                                    mutation = True
                                                else:
                                                    mutation = False

                                                # 37: X.ANY.ANY.ANY, correct substrate, human, T = 37, pH = correct
                                                for a in range(len(metabolites_reverse)):
                                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_37[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break

                                                # 38: X.ANY.ANY.ANY, correct substrate, human, T = 25-40, pH within 1.0 of correct
                                                for a in range(len(metabolites_reverse)):
                                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_38[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break 

                                                # 39: X.ANY.ANY.ANY, correct substrate, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                                for a in range(len(metabolites_reverse)):
                                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_39[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break

                                                # 43: X.ANY.ANY.ANY, all substrates, human, T = 37, pH = correct
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_43[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_43[c].append(max(tanimoto))

                                               # 44: X.ANY.ANY.ANY, all substrates, human, T = 25-40, pH within 1.0 of correct
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_44[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_44[c].append(max(tanimoto))

                                                # 45: X.ANY.ANY.ANY, all substrates, human, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_45[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_45[c].append(max(tanimoto))         

                                    # iterate over all available matching all organism ec numbers
                                    for ec in available_all:
                                        if (ec.split('.')[0] == ecs1[c]):

                                            # load all data
                                            df_human = pd.read_table('_data_/processing/brenda/all_organisms/%s.tsv' % ec)

                                            # iterate over all data
                                            for b in range(df_human.shape[0]):

                                                # get substrate ID's
                                                if type(chebis[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_chebi = chebis[inchis.index(df_human.loc[b]['INCHI'])].split(':')[1]
                                                else:
                                                    sub_chebi = np.nan

                                                if type(hmdbs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_hmdb = hmdbs[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_hmdb = np.nan

                                                if type(inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_inchi = inchi_codes[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_inchi = np.nan

                                                if type(keggs[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_kegg = keggs[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_kegg = np.nan

                                                if type(pubchems[inchis.index(df_human.loc[b]['INCHI'])]) == str:
                                                    sub_pubchem = pubchems[inchis.index(df_human.loc[b]['INCHI'])]
                                                else:
                                                    sub_pubchem = np.nan

                                                # get temperature, ph, mutation
                                                sub_temp = round(df_human.loc[b]['TEMP'],1)
                                                sub_ph = round(df_human.loc[b]['PH'],1)
                                                if type(df_human.loc[b]['MUTATION']) == str:
                                                    mutation = True
                                                else:
                                                    mutation = False

                                                # 40: correct ec, correct substrate, all orgnanisms, T = 37, pH = correct
                                                for a in range(len(metabolites_reverse)):
                                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_40[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break

                                                # 41: correct ec, correct substrate, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                                for a in range(len(metabolites_reverse)):
                                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_41[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break 

                                                # 42: correct ec, correct substrate, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                                for a in range(len(metabolites_reverse)):
                                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation) and ((met_chebi[a] == sub_chebi) or (met_hmdb[a] == sub_hmdb) or (met_inchi[a] == sub_inchi) or (met_kegg[a] == sub_kegg) or (met_pubchem[a] == sub_pubchem)):
                                                        values_ec_42[c].append(df_human.loc[b]['VALUE'] / stoich_reverse[a])
                                                        break

                                                # 46: correct ec, all substrates, all orgnanisms, T = 37, pH = correct
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if (sub_temp == 37) and (sub_ph == met_ph[a]) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_46[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_46[c].append(max(tanimoto))

                                               # 47: correct ec, all substrates, all orgnanisms, T = 25-40, pH within 1.0 of correct
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if ((sub_temp >= 25) and (sub_temp <= 40)) and (abs(sub_ph-met_ph[a]) <= 1) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_47[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_47[c].append(max(tanimoto))

                                                # 48: correct ec, all substrates, all orgnanisms, T = 25-40 + unknown, pH within 1.0 of correct + unknown
                                                tanimoto = []
                                                for a in range(len(metabolites_reverse)):
                                                    if (((sub_temp >= 25) and (sub_temp <= 40)) or np.isnan(sub_temp)) and ((abs(sub_ph-met_ph[a]) <= 1) or np.isnan(sub_ph)) and (not mutation):

                                                        # if id's available, report tanimoto distance
                                                        if (type(met_inchi[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('inchi',met_inchi[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        elif (type(met_smiles[a]) == str) and (type(sub_inchi) == str):
                                                            tanimoto.append(pybel.readstring('smiles',met_smiles[a]).calcfp() | pybel.readstring('inchi',sub_inchi).calcfp())
                                                        else:
                                                            tanimoto.append(0)

                                                if len(tanimoto) > 0:
                                                    values_ec_48[c].append(df_human.loc[b]['VALUE'])
                                                    tanimoto_ec_48[c].append(max(tanimoto))                    

                                # aggregate data for all valid ec numbers
                                values37 = []
                                values38 = []
                                values39 = []
                                values40 = []
                                values41 = []
                                values42 = []
                                values43 = []
                                values44 = []
                                values45 = []
                                values46 = []
                                values47 = []
                                values48 = []
                                for c in range(len(ecs1)):
                                    if len(values_ec_37[c]) > 0:
                                        values37.append(np.mean(values_ec_37[c]))
                                    if len(values_ec_38[c]) > 0:
                                        values38.append(np.mean(values_ec_38[c]))
                                    if len(values_ec_39[c]) > 0:
                                        values39.append(np.mean(values_ec_39[c]))
                                    if len(values_ec_40[c]) > 0:
                                        values40.append(np.mean(values_ec_40[c]))
                                    if len(values_ec_41[c]) > 0:
                                        values41.append(np.mean(values_ec_41[c]))
                                    if len(values_ec_42[c]) > 0:
                                        values42.append(np.mean(values_ec_42[c]))
                                    if len(values_ec_43[c]) > 0:
                                        if max(tanimoto_ec_43[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_43[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_43[c]) for x in tanimoto_ec_43[c]]
                                        values43.append(np.dot(weights,values_ec_43[c]))
                                    if len(values_ec_44[c]) > 0:
                                        if max(tanimoto_ec_44[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_44[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_44[c]) for x in tanimoto_ec_44[c]]
                                        values44.append(np.dot(weights,values_ec_44[c]))
                                    if len(values_ec_45[c]) > 0:
                                        if max(tanimoto_ec_45[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_45[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_45[c]) for x in tanimoto_ec_45[c]]
                                        values45.append(np.dot(weights,values_ec_45[c]))
                                    if len(values_ec_46[c]) > 0:
                                        if max(tanimoto_ec_46[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_46[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_46[c]) for x in tanimoto_ec_46[c]]
                                        values46.append(np.dot(weights,values_ec_46[c]))
                                    if len(values_ec_47[c]) > 0:
                                        if max(tanimoto_ec_47[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_47[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_47[c]) for x in tanimoto_ec_47[c]]
                                        values47.append(np.dot(weights,values_ec_47[c]))
                                    if len(values_ec_48[c]) > 0:
                                        if max(tanimoto_ec_48[c]) == 0:
                                            weights = np.ones(len(tanimoto_ec_48[c]))
                                        else:
                                            weights = [float(x)/sum(tanimoto_ec_48[c]) for x in tanimoto_ec_48[c]]
                                        values48.append(np.dot(weights,values_ec_48[c]))

                                # determine which pipeline should be used
                                if len(values37) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values37)*60*60,37]
                                elif len(values38) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values38)*60*60,38]
                                elif len(values39) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values39)*60*60,39]
                                elif len(values40) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values40)*60*60,40]
                                elif len(values41) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values41)*60*60,41]
                                elif len(values42) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values42)*60*60,42]
                                elif len(values43) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values43)*60*60,43]
                                elif len(values44) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values44)*60*60,44]
                                elif len(values45) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values45)*60*60,45]
                                elif len(values46) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values46)*60*60,46]
                                elif len(values47) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values47)*60*60,47]
                                elif len(values48) > 0:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.mean(values48)*60*60,48]
                                else:
                                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan] 

                # if not reversible
                else:
                    kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]

        # if no EC number
        else:
            kcat_forward.loc[kcat_forward.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]
            kcat_reverse.loc[kcat_reverse.shape[0]] = [df_reactions.loc[i]['RXNID'],np.nan,np.nan]
            
        f.write('%d\n' % i)

### IDH1 WT values

In [None]:
# kcat values
idh1_forward_wt = 85*60*60;
idh1_neomorphic_wt = 0.019*60*60;

# implement values
kcat_forward.loc[kcat_forward['REACTION'].tolist().index('ICDHy'),'KCAT [1/hr]'] = idh1_forward_wt;
kcat_forward.loc[kcat_forward['REACTION'].tolist().index('ICDHyp'),'KCAT [1/hr]'] = idh1_forward_wt;
kcat_forward.loc[kcat_forward['REACTION'].tolist().index('IDH1_R132'),'KCAT [1/hr]'] = idh1_neomorphic_wt;

### Output result files

In [None]:
kcat_forward.to_csv('kcat_forward.csv',index=False)
kcat_reverse.to_csv('kcat_reverse.csv',index=False)