In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 95% !important; }</style>"))

In [None]:
import os
import re
import numpy
import pandas

In [None]:
with open('./data_metabolism.txt', 'r') as infile:
    data = pandas.read_csv(infile, delimiter = '\t', header = 0, comment = '#')
# data

In [None]:
# find duplicates and inform the user
data[data.duplicated(['REACTION'])].to_csv('./conflicting_reactions.txt', sep = '\t', index = False)

In [None]:
# create metabolic reactions without duplicated
data = data[~data.duplicated(['REACTION'], keep = 'first')]
# data.duplicated()

In [None]:
try:
    os.remove('reactions.py')
except:
    pass

In [None]:
for rxn in data.values:
    if 'CPLX' in rxn[0]: # a complex is the enzyme
        enzyme = 'cplx(name = \'{:s}\', loc = \'cyt\')'.format(rxn[0].replace('-', '_'))
        
    elif rxn[0].startswith('['): # an enzymatic complex described by its monomers
        monomers = rxn[0][1:-1].split(', ')
        enzyme = []
        
        ## create link indexes
        dw = [None] * len(monomers)
        start_link = 1
        for index in range(len(monomers)-1):
            dw[index] = start_link
            start_link += 1
        up = dw[-1:] + dw[:-1]
        
        for index, monomer in enumerate(monomers):
            enzyme.append('prot(name = \'{:s}\', loc = \'cyt\', up = {:s}, dw = {:s})'.format(monomer, str(up[index]), str(dw[index])))
                
        enzyme = ' %\n    '.join(enzyme)
        
    else: # a monomer is the enzyme
        enzyme = 'prot(name = \'{:s}\', loc = \'cyt\')'.format(rxn[0].replace('-', '_'))
        
    name = rxn[1].replace('-', '_')
    if name[0].isdigit():
        name = '_' + name
    substrates = rxn[2].replace('-', '_').split(', ')
    products = rxn[3].replace('-', '_').split(', ')

    LHS = []
    RHS = []
    
    for subs in substrates:
        if subs[0].isdigit():
            subs = '_' + subs
        
        if 'PER' in subs:
            LHS.append('met(name = \'{:s}\', loc = \'per\', prot = None)'.format(subs.replace('PER_', '')))
        else:
            LHS.append('met(name = \'{:s}\', loc = \'cyt\', prot = None)'.format(subs))

    for prod in products:
        if prod[0].isdigit():
            prod = '_' + prod
        
        if 'PER' in prod: # inverse transport reaction
            RHS.append('met(name = \'{:s}\', loc = \'per\', prot = None)'.format(prod.replace('PER_', '')))
        else:
            RHS.append('met(name = \'{:s}\', loc = \'cyt\', prot = None)'.format(prod))
            
    if len(substrates) < len(products):
        for index in range(len(substrates), len(products)):
            LHS.append('None')
    elif len(products) < len(substrates):
        for index in range(len(products), len(substrates)):
            RHS.append('None')
            
    LHS = ' +\n    '.join(LHS)
    RHS = ' +\n    '.join(RHS)
        
    if rxn[0] == 'spontaneous':
        Rule = 'Rule(\'{:s}\,\n' \
               '    {:s} |\n'\
               '    {:s}, \n' \
               '    Parameter(\'fwd_{:s}\', 1), \n' \
               '    Parameter(\'rvs_{:s}\', 1))' \
               .format(name, LHS, RHS, name, name)
    
    else: # need an enzyme
        Rule = 'Rule(\'{:s}\',\n' \
               '    {:s} +\n    {:s} | \n' \
               '    {:s} +\n    {:s}, \n' \
               '    Parameter(\'fwd_{:s}\', 1), \n' \
               '    Parameter(\'rvs_{:s}\', 0))' \
               .format(name, enzyme, LHS, enzyme, RHS, name, name).replace('.', '_')
        
    print(Rule)
    print()
        
    with open('reactions.py', 'a+') as outfile:
        outfile.write(Rule)
        outfile.write('\n\n')