In [1]:
import numpy, pandas, re

In [2]:
with open('../docs/networks/ppi_network3.tsv', 'r') as infile:
    data = pandas.read_csv(infile, delimiter = '\t', header = 0)
# data

In [3]:
for i in data.index:
    ## write LHS (problematic because we need to find complexes in the LHS)
    agents = (data.iloc[i, 0] + ',' + data.iloc[i, 1])
    names = agents.split(',')
    
    LHS = []
    next_in_complex = False
    for molecule in names:
        if molecule[0] == '[': # we are dealing with the first monomer of a complex
            next_in_complex = True
            LHS.append('prot(name = \'{:s}\', up = {{:s}}, dw = {{:s}})'.format(molecule[1:]))
        elif molecule[-1] == ']': # we are dealing with the last monomer of a complex
            next_in_complex = False            
            LHS.append('prot(name = \'{:s}\', up = {{:s}}, dw = {{:s}})'.format(molecule[:-1]))
        elif next_in_complex:
            LHS.append('prot(name = \'{:s}\', up = {{:s}}, dw = {{:s}})'.format(molecule))
        else: # we have a monomer
            LHS.append('prot(name = \'{:s}\', up = None, dw = None)'.format(molecule))
            
    ## look for where starts and ends a complex in the LHS
    monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z-_]+', agents)]
    complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z-_, ]+\]', agents)]
    
    positions = []
    for cplx_pos in reversed(complexes):
        pos_i = None
        pos_f = None
        for index, kmer_pos in enumerate(monomers):
            if cplx_pos[0] == kmer_pos[0]:
                pos_i = index
            if cplx_pos[1] == kmer_pos[1]:
                pos_f = index
                positions.append((pos_i, pos_f))
                break

    ## join complexes following start and end positions
    start_link = 1
    for position in positions:
        count_monomers = len(LHS[position[0]:position[1]+1])
        dw = list(range(start_link, start_link + count_monomers))
        up = [dw[-1]] + dw[:-1]
        up[0] = 'None'
        dw[-1] = 'None'
        
        for index, sub_position in enumerate(range(position[0], position[1]+1)):
            LHS[sub_position] = LHS[sub_position].format(str(up[index]), str(dw[index]))

        ## join agents and remove from LHS list because they were joined into one
        LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
        for index in reversed(range(position[0]+1, position[1]+1)):
            LHS.pop(index)
            
        start_link += count_monomers -1

    ## final join
    LHS = ' +\n    '.join(LHS)
    
    ## write RHS (no problem, always bind molecules in a chain)
    agents = (data.iloc[i, 0] + ', ' + data.iloc[i, 1])
    agents = agents.replace('[', '').replace(']', '')
    agents = agents.split(', ')
    
    RHS = []
    # numbering links
    dw = list(range(1, len(agents)+1))
    up = [dw[-1]] + dw[:-1]
    up[0] = 'None'
    dw[-1] = 'None'

    for index, molecule in enumerate(agents):
        RHS.append('prot(name = \'{:s}\', up = {:s}, dw = {:s})' \
                   .format(molecule, str(up[index]), str(dw[index])))
    RHS = ' %\n    '.join(RHS)
    
    ## print complete rule
    name = 'ProtProt_AssemblyRule_' + str(i+1)
    print('Rule(\'{:s}\', \n' \
          '    {:s} |\n' \
          '    {:s},\n' \
          '    Parameter(\'fwd_{:s}\', 1),\n' \
          '    Parameter(\'rvs_{:s}\', 0))' \
          .format(name, LHS, RHS, name, name).replace('-', '_'))
    print()

Rule('ProtProt_AssemblyRule_1', 
    prot(name = 'lacZ', up = None, dw = None) +
    prot(name = 'lacZ', up = None, dw = None) |
    prot(name = 'lacZ', up = None, dw = 1) %
    prot(name = 'lacZ', up = 1, dw = None),
    Parameter('fwd_ProtProt_AssemblyRule_1', 1),
    Parameter('rvs_ProtProt_AssemblyRule_1', 0))

Rule('ProtProt_AssemblyRule_2', 
    prot(name = 'lacZ', up = None, dw = 2) %
    prot(name = 'lacZ', up = 2, dw = None) +
    prot(name = 'lacZ', up = None, dw = 1) %
    prot(name = 'lacZ', up = 1, dw = None) |
    prot(name = 'lacZ,lacZ', up = None, dw = 1) %
    prot(name = 'lacZ,lacZ', up = 1, dw = None),
    Parameter('fwd_ProtProt_AssemblyRule_2', 1),
    Parameter('rvs_ProtProt_AssemblyRule_2', 0))

Rule('ProtProt_AssemblyRule_3', 
    prot(name = 'lacA', up = None, dw = None) +
    prot(name = 'lacA', up = None, dw = None) |
    prot(name = 'lacA', up = None, dw = 1) %
    prot(name = 'lacA', up = 1, dw = None),
    Parameter('fwd_ProtProt_AssemblyRule_3', 1),
    P