# Making reactions file
DO 12-6-2018  
Setting up all of the C. therm reactions in a way that eQuilibrator can read


In [1]:
import numpy as np
import pandas as pd
from equilibrator_api import Reaction, ComponentContribution, ReactionMatcher, CompoundMatcher, ParseError, Pathway
from equilibrator_api.bounds import Bounds
%matplotlib inline

In [124]:
rxnDf = pd.read_excel('list of reactions.xlsx', sheet_name = 'plaintext')
rxnDf[:5]

Unnamed: 0,Name,ReactionFormula,KEGG IDs,RelativeFlux,Notes
0,CBtx,H2O + ATP <=> ADP + Phosphate,,1,ATP hydrolysis for cellobiose transport
1,CBP,Phosphate + Cellobiose <=> D-Glucose + Glucose...,,1,
2,GLK,D-Glucose + GTP <=> D-Glucose-6-phosphate + GDP,,2,
3,PGMT,Glucose-1-phosphate <=> D-glucose-6-phosphate,,2,
4,PGI,D-Glucose-6-phosphate <=> D-Fructose-6-phosphate,,2,


In [139]:
kt[:5]

Unnamed: 0,KEGG ID(S),DATABASE,PRIMARY NAME,ABBREVIATION,NAMES,FORMULA,CHARGE,DELTAG (kcal/mol),DELTAG ERROR (kcal/mol),MASS
0,C00001,cpd00001,H2O,h2o,OH-|HO-|Water|H2O,H2O,0.0,-56.687,0.5,18.0
1,C00002,cpd00002,ATP,atp,Adenosine 5'-triphosphate|ATP,C10H13N5O13P3,-3.0,-673.85,3.04314,504.0
2,C00003,cpd00003,NAD,nad,Nicotinamideadeninedinucleotide|Nadide|Diphosp...,C21H26N7O14P2,-1.0,-529.59,4.35693,662.0
3,C00004,cpd00004,NADH,nadh,Nicotinamideadeninedinucleotide-reduced|Nicoti...,C21H27N7O14P2,-2.0,-524.32,4.26795,663.0
4,C00005,cpd00005,NADPH,nadph,Nicotinamideadeninedinucleotidephosphate-reduc...,C21H27N7O17P3,-3.0,-736.82,4.25788,742.0


In [175]:
# make a dictionary to translate KEGG IDs into human-readable abbreviations
keggTranslatorDf = pd.read_excel('KEGG_SEED_DO.xls')
kt = keggTranslatorDf #short name for easier typing

# translate KEGG ID to long name
ktn = kt.loc[:,['KEGG ID(S)', 'PRIMARY NAME']]
ktn['KEGG ID(S)'] = ktn['KEGG ID(S)'].str.lower() # set to lower case for better matching
ktn.set_index('KEGG ID(S)', inplace = True)

# translate long name to KEGG ID
# the original dictionaries sometimes had trouble with duplicate KEGG IDs. If there are duplicates, make sure to choose the lowest number
ntk = kt.loc[:,['PRIMARY NAME', 'KEGG ID(S)']].sort_values(by = ['KEGG ID(S)'], ascending = True)
ntk['PRIMARY NAME'] = ntk['PRIMARY NAME'].str.lower() # set to lower case for better matching
ntk = ntk.groupby('PRIMARY NAME').first() # take the first KEGG ID in each group

# translate KEGG ID to abbreviation
kta = kt.loc[:,['KEGG ID(S)', 'ABBREVIATION']]
kta['KEGG ID(S)'] = kta['KEGG ID(S)'].str.lower() # set to lower case for better matching
kta.set_index('KEGG ID(S)', inplace = True)


In [176]:
def translateKeggString(keggStr, k2n):
    """
    Translate a reaction written with KEGG IDs to one written with
    plaintext abbreviations for each metabolite
    
    Args:
       keggStr: a reaction written with KEGG IDs
       kt: a dictionary for translating from KEGG to names or the reverse
    """
    newStr = '' # empty string to hold the result
    for word in keggStr.split():
        try:
            newName = k2n.loc[word.lower()]
        except:
            newName = word     
        newStr += newName # add the translated word to the string
        newStr += ' ' # add a space
    #print(newStr) # for debugging
    return(newStr)

In [177]:
# translate the KEGG formulas to plaintext formulas
rxnDf['formula'] = rxnDf['ReactionFormula'].apply(lambda x: translateKeggString(x, ntk))
rxnDf['form_short'] = rxnDf['formula'].apply(lambda x: translateKeggString(x, kta))
rxnDf

Unnamed: 0,Name,ReactionFormula,KEGG IDs,RelativeFlux,Notes,formula,form_short
0,CBtx,H2O + ATP <=> ADP + Phosphate,,1,ATP hydrolysis for cellobiose transport,C00001 + C00002 <=> C00008 + C00009,h2o + atp <=> adp + pi
1,CBP,Phosphate + Cellobiose <=> D-Glucose + Glucose...,,1,,C00009 + C00185 <=> C00031 + C00103,pi + cellb <=> glc-D + g1p
2,GLK,D-Glucose + GTP <=> D-Glucose-6-phosphate + GDP,,2,,C00031 + C00044 <=> C00092 + C00035,glc-D + gtp <=> g6p + gdp
3,PGMT,Glucose-1-phosphate <=> D-glucose-6-phosphate,,2,,C00103 <=> C00092,g1p <=> g6p
4,PGI,D-Glucose-6-phosphate <=> D-Fructose-6-phosphate,,2,,C00092 <=> C00085,g6p <=> f6p
5,PFK-PPi,PPi + D-fructose-6-phosphate <=> Phosphate + D...,,2,,C00013 + C00085 <=> C00009 + C00354 + C00080,ppi + f6p <=> pi + fdp + h
6,PFK-ATP,ATP + D-Fructose-6-phosphate <=> ADP + D-Fruct...,,2,,C00002 + C00085 <=> C00008 + C00354,atp + f6p <=> adp + fdp
7,FBA,"D-Fructose-1,6-bisphosphate <=> Glycerone-phos...",,2,,C00354 <=> C00111 + C00118,fdp <=> dhap + g3p
8,TPI,Glycerone-phosphate <=> D-Glyceraldehyde3-phos...,,4,,C00111 <=> C00118,dhap <=> g3p
9,GAPDH,Phosphate + NAD + D-Glyceraldehyde3-phosphate ...,,4,,C00009 + C00003 + C00118 <=> C00004 + C00236,pi + nad + g3p <=> nadh + 13dpg


In [178]:
# save the output to a file. Be careful about overwriting this file, since there may be manual edits.
rxnDf.to_excel('all_rxns_v4.xlsx')