In [1]:
import numpy, pandas, re

In [2]:
with open('../docs/networks/smallcompounds_network1.tsv', 'r') as infile:
    data = pandas.read_csv(infile, delimiter = '\t', header = 0)
data

Unnamed: 0,SOURCE,TARGET,FWD_RATE,RVS_RATE
0,PER-araF,SMALL-PER-alpha-L-arabinofuranose,1.0,1.0
1,PER-araF,SMALL-PER-beta-L-arabinofuranose,1.0,1.0
2,PER-araF,SMALL-PER-alpha-L-arabinopyranose,1.0,1.0
3,PER-araF,SMALL-PER-beta-L-arabinopyranose,1.0,1.0
4,"[crp,crp]",SMALL-CAMP,1.0,1.0
5,"[crp,SMALL-CAMP,crp]",SMALL-CAMP,1.0,1.0
6,"[lacI,lacI]",SMALL-ALLOLACTOSE,1.0,1.0
7,"[lacI,SMALL-ALLOLACTOSE,lacI]",SMALL-ALLOLACTOSE,1.0,1.0
8,"[araG,araG]",SMALL-ATP,1.0,1.0
9,araC,SMALL-alpha-L-arabinopyranose,1.0,1.0


In [3]:
RULE_LHS = []
for i in data.index:
    # data
    agents = (data.iloc[i, 0] + ',' + data.iloc[i, 1])
    names = agents.split(',')
    
    ## form the LHS
    LHS = []
    next_in_complex = False
    for name in names:
        ## defaults
        type = 'prot'
        link = 'met'
        loc = 'cyt'

        if 'SMALL' in name:
            type = 'met'
            link = 'prot'
        if 'PER' in name:
            loc = 'per'
        
        name = name.replace('PER-', '').replace('SMALL-', '')
            
        if name[0] == '[': # we are dealing with the first monomer of a complex
            molecule = name[1:]
            next_in_complex = True
            linked = '{:s}'
        elif name[-1] == ']': # we are dealing with the last monomer of a complex
            molecule = name[:-1]
            next_in_complex = False
            linked = '{:s}'
        elif next_in_complex: # we are dealing with a monomer part of a complex
            molecule = name
            linked = '{:s}'
        else:
            molecule = name
            linked = 'None'
            
        if type == 'prot':
            LHS.append('{:s}(name = \'{:s}\', loc = \'{:s}\', {:s} = {:s}, up = {{:s}}, dw = {{:s}})' \
                       .format(type, molecule, loc, link, linked))
        else:
            LHS.append('{:s}(name = \'{:s}\', loc = \'{:s}\', {:s} = {:s})' \
                       .format(type, molecule, loc, link, linked))
            
    ## look for where starts and ends a complex in the LHS    
    complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z-_, ]+\]', agents)]
    monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z-_]+', agents)]
    
    positions = []
    for cplx_pos in reversed(complexes):
        pos_i = None
        pos_f = None
        for index, kmer_pos in enumerate(monomers):
            if cplx_pos[0] == kmer_pos[0]:
                pos_i = index
            if cplx_pos[1] == kmer_pos[1]:
                pos_f = index
                positions.append((pos_i, pos_f))
                break

    ## join complexes following start and end positions
    for position in positions:
        ## create numbered links
        count_monomers = len(LHS[position[0]:position[1]+1])
        count_small = ' '.join(LHS[position[0]:position[1]+1]).count('met(')
        count_prots = ' '.join(LHS[position[0]:position[1]+1]).count('prot(')

        up = ['None'] * count_monomers
        dw = ['None'] * count_monomers
        prot_met = ['None'] * count_monomers

        starter_link = 1
        if count_prots >= 1:
            ## index prot-prot links
            for index in range(position[0], position[1]+1):
                if index == 0 and LHS[index].startswith('prot('):
                    dw[index] = starter_link
                elif index == count_monomers-1 and LHS[index].startswith('prot('):
                    up[index] = starter_link
                    starter_link += 1
                else:
                    if LHS[index].startswith('prot('):
                        dw[index] = starter_link + 1
                        up[index] = starter_link
                        starter_link += 1
        
        if count_small >= 1:
            ## index prot-met links
            for index in range(position[0], position[1]+1):
                if LHS[index].startswith('met('):
                    prot_met[index] = starter_link
                    prot_met[index-1] = starter_link
                    starter_link += 1
        
        ## replace {:s} with calculated links
        for index, sub_position in enumerate(range(position[0], position[1]+1)):
            if LHS[sub_position].startswith('prot'):
                LHS[sub_position] = \
                    LHS[sub_position].format(str(prot_met[index]), str(up[index]), str(dw[index]))
            else:
                LHS[sub_position] = LHS[sub_position].format(str(prot_met[index]))

        ## join agents and remove from LHS list because they were joined into one position
        LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
        for index in reversed(range(position[0]+1, position[1]+1)):
            LHS.pop(index)

    ## LHS final join
    LHS = ' +\n    '.join(LHS)
    RULE_LHS.append(LHS)
    
#     print(LHS)
#     print()

In [4]:
RULE_RHS = []
for i in data.index:
    ## write the RHS
    agents = (data.iloc[i, 0] + ',' + data.iloc[i, 1]).replace('[', '').replace(']', '')
    names = agents.split(',')

    RHS = []
    for index, name in enumerate(names):
        ## defaults
        type = 'prot'
        link = 'met'
        loc = 'cyt'

        if 'SMALL' in name:
            type = 'met'
            link = 'prot'
        if 'PER' in name:
            loc = 'per'
        
        name = name.replace('PER-', '').replace('SMALL-', '')
        
        if type == 'prot':
            RHS.append(
                '{:s}(name = \'{:s}\', loc = \'{:s}\', {:s} = {{:s}}, up = {{:s}}, dw = {{:s}})' \
                .format(type, name, loc, link))
        else:
            RHS.append(
                '{:s}(name = \'{:s}\', loc = \'{:s}\', {:s} = {{:s}})' \
                .format(type, name, loc, link))

    ## create numbered links
    count_monomers = len(RHS)
    count_small = ' '.join(RHS).count('met(')
    count_prots = ' '.join(RHS).count('prot(')

    up = ['None'] * count_monomers
    dw = ['None'] * count_monomers
    prot_met = ['None'] * count_monomers
    
    starter_link = 1
    if count_prots > 1:
        ## index prot-prot links
        for index in range(count_monomers):
            if index == 0 and RHS[index].startswith('prot('):
                dw[index] = starter_link
            elif index == (count_monomers-count_prots) and RHS[index].startswith('prot('):
                up[index] = starter_link
                starter_link += 1
            else:
                if RHS[index].startswith('prot('):
                    dw[index] = starter_link + 1
                    up[index] = starter_link
                    starter_link += 1

    ## index prot-met links
    for index, agent in enumerate(RHS):
        if agent.startswith('met('):
            prot_met[index] = starter_link
            prot_met[index-1] = starter_link
            starter_link += 1

    for index in range(len(RHS)):
        if RHS[index].startswith('prot('):
            RHS[index] = RHS[index].format(str(prot_met[index]), str(up[index]), str(dw[index]))
        else:
            RHS[index] = RHS[index].format(str(prot_met[index]))
    
    RHS = ' %\n    '.join(RHS) # all agents are linked together
    RULE_RHS.append(RHS)
    
#     print(RHS)
#     print()

In [5]:
for index, _ in enumerate(data.index):
    ## complete rule
    name = 'ProtMet_RuleAssembly_' + str(index+1)
    print('Rule(\'{:s}\', \n' \
          '    {:s} | \n' \
          '    {:s}, \n' \
          '    Parameter(\'fwd_{:s}\', 0), Parameter(\'rvs_{:s}\', 0))' \
          .format(name, RULE_LHS[index], RULE_RHS[index], name, name).replace('-', '_').replace('{:s}', 'None'))
    print()

Rule('ProtMet_RuleAssembly_1', 
    prot(name = 'araF', loc = 'per', met = None, up = None, dw = None) +
    met(name = 'alpha_L_arabinofuranose', loc = 'per', prot = None) | 
    prot(name = 'araF', loc = 'per', met = 1, up = None, dw = None) %
    met(name = 'alpha_L_arabinofuranose', loc = 'per', prot = 1), 
    Parameter('fwd_ProtMet_RuleAssembly_1', 0), Parameter('rvs_ProtMet_RuleAssembly_1', 0))

Rule('ProtMet_RuleAssembly_2', 
    prot(name = 'araF', loc = 'per', met = None, up = None, dw = None) +
    met(name = 'beta_L_arabinofuranose', loc = 'per', prot = None) | 
    prot(name = 'araF', loc = 'per', met = 1, up = None, dw = None) %
    met(name = 'beta_L_arabinofuranose', loc = 'per', prot = 1), 
    Parameter('fwd_ProtMet_RuleAssembly_2', 0), Parameter('rvs_ProtMet_RuleAssembly_2', 0))

Rule('ProtMet_RuleAssembly_3', 
    prot(name = 'araF', loc = 'per', met = None, up = None, dw = None) +
    met(name = 'alpha_L_arabinopyranose', loc = 'per', prot = None) | 
    prot(name 