In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 95% !important; }</style>"))

In [2]:
import numpy, pandas, re
debug = False

In [3]:
with open('./sigma-promoters.txt', 'r') as infile:
    data = pandas.read_csv(infile, delimiter = '\t', header = None, comment = '#')
# data

In [4]:
with open('./architecture_final.txt', 'r') as infile:
    data_arq = pandas.read_csv(infile, delimiter = '\t')
# data_arq

In [5]:
description = []
RULE_LHS = []
for i in data.index:
    # data
    agents = (data.iloc[i, 0] + ', ' + data.iloc[i, 1])
    names = agents.split(', ')
    
    if debug:
        print(data.iloc[i, 0] + ' interacts with ' + data.iloc[i, 1])
    
    ## form the LHS
    LHS = []
    next_in_complex = False
    for name in names:
        if name[0] == '[': # we are dealing with the first monomer of a complex
            molecule = name[1:]
            next_in_complex = True
        elif name[-1] == ']': # we are dealing with the last monomer of a complex
            molecule = name[:-1]
            next_in_complex = False
        elif next_in_complex: # we are dealing with a monomer part of a complex
            molecule = name
        else:
            molecule = name
            linked = 'None'
            
        if 'BS' in name:
            if 'pro' in name:
                molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
            LHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                       .format(molecule))
        elif 'SMALL' in name:
            LHS.append('met(name = \'{:s}\', prot = met_link)' \
                       .format(molecule.replace('SMALL-', '')))
        else:
            LHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                       .format(molecule))
            
    ## look for where starts and ends a complex in the LHS    
    complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
    monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]
    
    positions = []
    for cplx_pos in reversed(complexes):
        pos_i = None
        pos_f = None
        for index, kmer_pos in enumerate(monomers):
            if cplx_pos[0] == kmer_pos[0]:
                pos_i = index
            if cplx_pos[1] == kmer_pos[1]:
                pos_f = index
                positions.append((pos_i, pos_f))
                break

    ## join complexes following start and end positions
    for position in positions:
        ## join agents and remove from LHS list because they were joined into one position
        LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
        for index in reversed(range(position[0]+1, position[1]+1)):
            LHS.pop(index)
            
    ## create numbered links
    starter_link = 1
    for index, agent in enumerate(LHS):
        count_monomers = len(agent.split('%'))
        count_small = agent.count('met(')
        count_prots = agent.count('prot(')
        count_dnas = agent.count('dna(')

        if count_prots > 1:
            dw = [None] * count_prots
            for prot in range(count_prots-1):
                dw[prot] = starter_link
                starter_link += 1
            up = dw[-1:] + dw[:-1]
            ## and replace indexes
            c = list(zip(up, dw))
            c = [elt for sublist in c for elt in sublist]
            LHS[index] = LHS[index].replace('prot_link', '{}').format(*c)
                   
        if count_small >= 1 and count_prots >= 1:
            dw = [None] * (count_small + count_prots)
            for met in numpy.arange(0, count_small + count_prots, 2):
                dw[met] = starter_link
                dw[met-1] = starter_link
                starter_link += 1
            ## and replace indexes
            LHS[index] = LHS[index].replace('met_link', '{}').format(*tuple(dw))
        
        if count_dnas > 1:
            dw = ['WILD'] * count_dnas
#             for dna in range(count_dnas-1):
#                 dw[dna] = starter_link
#                 starter_link += 1
            up = dw[-1:] + dw[:-1]
            ## and replace indexes
            c = list(zip(up, dw))
            c = [elt for sublist in c for elt in sublist]
            LHS[index] = LHS[index].replace('bs_link', '{}').format(*c)
            
        if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
            dw = [None] * (count_prots + count_dnas)
            for dna in range(count_prots + count_dnas):
                if dna == count_prots:
                    dw[dna] = starter_link
                    dw[dna-1] = starter_link
                    starter_link += 1
            ## and replace indexes
            LHS[index] = LHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)
        
        ## final replace
        LHS[index] = LHS[index].replace('prot_link', 'None')
        LHS[index] = LHS[index].replace('met_link', 'None')
        LHS[index] = LHS[index].replace('bs_link', 'WILD')
        LHS[index] = LHS[index].replace('dna_link', 'None')
        
    ## LHS final join
    LHS = ' +\n    '.join(LHS)
    RULE_LHS.append(LHS)
    
    description.append('# ' + data.iloc[i, 0] + ' interacts with ' + data.iloc[i, 1])
    
#     print(LHS)
#     print()

In [6]:
RULE_RHS = []
for i in data.index:
    ## data
    agents = (data.iloc[i, 0] + ', ' + data.iloc[i, 1]).replace('[', '').replace(']', '')
    names = agents.split(', ')

    ## write the RHS
    RHS = []
    for index, name in enumerate(names):
        if name[0] == '[': # we are dealing with the first monomer of a complex
            molecule = name[1:]
            next_in_complex = True
        elif name[-1] == ']': # we are dealing with the last monomer of a complex
            molecule = name[:-1]
            next_in_complex = False
        elif next_in_complex: # we are dealing with a monomer part of a complex
            molecule = name
        else:
            molecule = name
          
        if 'BS' in name:
            if 'pro' in name:
                molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
            RHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'False\', up = bs_link, dw = bs_link)' \
                       .format(molecule))
        elif 'SMALL' in name:
            RHS.append('met(name = \'{:s}\', prot = met_link)' \
                       .format(molecule.replace('SMALL-', '')))
        else:
            RHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                       .format(molecule))

    ## join complexes
    RHS = ' %\n    '.join(RHS)
            
    ## create numbered links
    agent = RHS
    count_monomers = len(agent.split('%'))
    count_small = agent.count('met(')
    count_prots = agent.count('prot(')
    count_dnas = agent.count('dna(')

    starter_link = 1
    if count_prots > 1:
        dw = [None] * count_prots
        for prot in range(count_prots-1):
            dw[prot] = starter_link
            starter_link += 1
        up = dw[-1:] + dw[:-1]
        ## and replace indexes
        c = list(zip(up, dw))
        c = [elt for sublist in c for elt in sublist]
        RHS = RHS.replace('prot_link', '{}').format(*c)

    if count_small >= 1:
        dw = [None] * (count_small + count_prots)
        for met in numpy.arange(0, count_small + count_prots, 2):
            dw[met] = starter_link
            dw[met-1] = starter_link
            starter_link += 1
        ## and replace indexes
        RHS = RHS.replace('met_link', '{}').format(*tuple(dw))

    if count_dnas > 1:
        dw = ['WILD'] * count_dnas
#         for dna in range(count_dnas-1):
#             dw[dna] = starter_link
#             starter_link += 1
        up = dw[-1:] + dw[:-1]
        ## and replace indexes
        c = list(zip(up, dw))
        c = [elt for sublist in c for elt in sublist]
        RHS = RHS.replace('bs_link', '{}').format(*c)
    
    ## always
    dw = [None] * (count_prots + count_dnas)
    for dna in range(count_prots + count_dnas):
        if dna == count_prots:
            dw[dna] = starter_link
            dw[dna-1] = starter_link
            starter_link += 1
    up = dw[-1:] + dw[:-1]
    ## and replace indexes
    RHS = RHS.replace('dna_link', '{}').format(*dw)
    
    ## final replace
    RHS = RHS.replace('prot_link', 'None')
    RHS = RHS.replace('met_link', 'None')
    RHS = RHS.replace('bs_link', 'WILD')
    RHS = RHS.replace('dna_link', 'None')
    
    RULE_RHS.append(RHS)
    
#     print(RHS)
#     print()

In [7]:
index = 0
for i in data.index:
    for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
        if data.iloc[i, 1][3:] == dna_part1:
            ## complete rule
            print('{:s}\n' \
                  'Rule(\'docking_{:d}_{:s}\', \n' \
                  '    {:s} | \n' \
                  '    {:s}, \n' \
                  '    Parameter(\'fwd_docking_{:d}_{:s}\', 0),\n' \
                  '    Parameter(\'rvs_docking_{:d}_{:s}\', 0))' \
                  .format(description[index], index+1, dna_part1, RULE_LHS[index], RULE_RHS[index], index+1, dna_part1, index+1, dna_part1).replace('-', '_'))
            print()
            index += 1

# [rpoA, rpoA, rpoB, rpoC, rpoD] interacts with BS_rpoA_pro1
Rule('docking_1_rpoA_pro1', 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = None, met = None, up = 3, dw = 4) %
    prot(name = 'rpoD', dna = None, met = None, up = 4, dw = None) +
    dna(name = 'rpoA', type = 'pro1', prot = None, free = 'True', up = WILD, dw = WILD) | 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = None, met = None, up = 3, dw = 4) %
    prot(name = 'rpoD', dna = 5, met = None, up = 4, dw = None) %
    dna(name = 'rpoA', type = 'pro1', prot = 5, free = 'False', up = WILD, dw = WILD), 
    Parameter('fwd_docking_1_rpoA_pro1', 0),
    Parameter('rv

In [8]:
## names for the dna agent
BS = []
for bs in ', '.join(data.iloc[:,1]).replace('[','').replace(']','').split(', '):
    if bs.startswith('BS') and 'pro' not in bs:
        BS.append(bs.replace('-','_'))
list(set(BS))

[]

In [9]:
description = []
RULE_LHS = []

for i in data.index:
    for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
        if data.iloc[i, 1][3:] == dna_part1:
            # data
            agents = (data.iloc[i, 0][:-1] + ', ' + data.iloc[i, 1]) + ']' + ', BS-' + dna_part2
            names = agents.split(', ')

            if debug:
                print(data.iloc[i, 0] + ' slides to ' + dna_part2)

            ## form the LHS
            LHS = []
            next_in_complex = False
            for name in names:
                if name[0] == '[': # we are dealing with the first monomer of a complex
                    molecule = name[1:]
                    next_in_complex = True
                elif name[-1] == ']': # we are dealing with the last monomer of a complex
                    molecule = name[:-1]
                    next_in_complex = False
                elif next_in_complex: # we are dealing with a monomer part of a complex
                    molecule = name
                else:
                    molecule = name

                if 'BS' in name:
                    if 'pro' in name or 'rbs' in name:
                        molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                    LHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                               .format(molecule))
                elif 'SMALL' in name:
                    LHS.append('met(name = \'{:s}\', prot = met_link)' \
                               .format(molecule.replace('SMALL-', '')))
                else:
                    LHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                               .format(molecule))

            ## look for where starts and ends a complex in the LHS    
            complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
            monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

            positions = []
            for cplx_pos in reversed(complexes):
                pos_i = None
                pos_f = None
                for index, kmer_pos in enumerate(monomers):
                    if cplx_pos[0] == kmer_pos[0]:
                        pos_i = index
                    if cplx_pos[1] == kmer_pos[1]:
                        pos_f = index
                        positions.append((pos_i, pos_f))
                        break

            ## join complexes following start and end positions
            for position in positions:
                ## join agents and remove from LHS list because they were joined into one position
                LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
                for index in reversed(range(position[0]+1, position[1]+1)):
                    LHS.pop(index)

            ## create numbered links
            starter_link = 1
            for index, agent in enumerate(LHS):
                count_monomers = len(agent.split('%'))
                count_small = agent.count('met(')
                count_prots = agent.count('prot(')
                count_dnas = agent.count('dna(')

                if count_prots > 1:
                    dw = [None] * count_prots
                    for prot in range(count_prots-1):
                        dw[prot] = starter_link
                        starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    LHS[index] = LHS[index].replace('prot_link', '{}').format(*c)

                if count_small >= 1 and count_prots >= 1:
                    dw = [None] * (count_small + count_prots)
                    for met in numpy.arange(0, count_small + count_prots, 2):
                        dw[met] = starter_link
                        dw[met-1] = starter_link
                        starter_link += 1
                    ## and replace indexes
                    LHS[index] = LHS[index].replace('met_link', '{}').format(*tuple(dw))

                if count_dnas > 1:
                    dw = ['WILD'] * count_dnas
        #             for dna in range(count_dnas-1):
        #                 dw[dna] = starter_link
        #                 starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    LHS[index] = LHS[index].replace('bs_link', '{}').format(*c)

                if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                    dw = [None] * (count_prots + count_dnas)
                    for dna in range(count_prots + count_dnas):
                        if dna == count_prots:
                            dw[dna] = starter_link
                            dw[dna-1] = starter_link
                            starter_link += 1
                    ## and replace indexes
                    LHS[index] = LHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

                ## final replace
                LHS[index] = LHS[index].replace('prot_link', 'None')
                LHS[index] = LHS[index].replace('met_link', 'None')
                LHS[index] = LHS[index].replace('bs_link', 'WILD')
                LHS[index] = LHS[index].replace('dna_link', 'None')

            ## LHS final join
            LHS = ' +\n    '.join(LHS) + ' + None'
            RULE_LHS.append(LHS)

            description.append('# ' + data.iloc[i, 0] + ' slides to ' + dna_part2)
        
# print(LHS)
# print()

In [10]:
RULE_RHS = []

for i in data.index:
    for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
        if data.iloc[i, 1][3:] == dna_part1:
            # data
            agents = (', '.join(data.iloc[i, 0].split(', ')[:-1]) + ', BS-' + dna_part2) + '], ' + data.iloc[i, 0].split(', ')[-1][:-1] + ', BS-' + dna_part1
            names = agents.split(', ')

            if debug:
                print(data.iloc[i, 0] + ' slides to ' + data.iloc[i, 1])

            ## form the RHS
            RHS = []
            next_in_complex = False
            for name in names:
                if name[0] == '[': # we are dealing with the first monomer of a complex
                    molecule = name[1:]
                    next_in_complex = True
                elif name[-1] == ']': # we are dealing with the last monomer of a complex
                    molecule = name[:-1]
                    next_in_complex = False
                elif next_in_complex: # we are dealing with a monomer part of a complex
                    molecule = name
                else:
                    molecule = name

                if 'BS' in name:
                    if 'pro' in name or 'rbs' in name or 'cds' in name:
                        molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                    RHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                               .format(molecule))
                elif 'SMALL' in name:
                    RHS.append('met(name = \'{:s}\', prot = met_link)' \
                               .format(molecule.replace('SMALL-', '')))
                else:
                    RHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                               .format(molecule))

            molecule = '{:s}\', type = \'{:s}'.format(dna_part2.split('-')[0], dna_part2.split('-')[1])
            RHS.append('rna(name = \'{:s}\', dna = None, prot = None)'.format(molecule))

            ## look for where starts and ends a complex in the RHS    
            complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
            monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

            positions = []
            for cplx_pos in reversed(complexes):
                pos_i = None
                pos_f = None
                for index, kmer_pos in enumerate(monomers):
                    if cplx_pos[0] == kmer_pos[0]:
                        pos_i = index
                    if cplx_pos[1] == kmer_pos[1]:
                        pos_f = index
                        positions.append((pos_i, pos_f))
                        break

            ## join complexes following start and end positions
            for position in positions:
                ## join agents and remove from RHS list because they were joined into one position
                RHS[position[0]] = ' %\n    '.join(RHS[position[0]:position[1]+1])
                for index in reversed(range(position[0]+1, position[1]+1)):
                    RHS.pop(index)

            ## create numbered links
            starter_link = 1
            for index, agent in enumerate(RHS):
                count_monomers = len(agent.split('%'))
                count_small = agent.count('met(')
                count_prots = agent.count('prot(')
                count_dnas = agent.count('dna(')

                if count_prots > 1:
                    dw = [None] * count_prots
                    for prot in range(count_prots-1):
                        dw[prot] = starter_link
                        starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    RHS[index] = RHS[index].replace('prot_link', '{}').format(*c)

                if count_small >= 1 and count_prots >= 1:
                    dw = [None] * (count_small + count_prots)
                    for met in numpy.arange(0, count_small + count_prots, 2):
                        dw[met] = starter_link
                        dw[met-1] = starter_link
                        starter_link += 1
                    ## and replace indexes
                    RHS[index] = RHS[index].replace('met_link', '{}').format(*tuple(dw))

                if count_dnas > 1:
                    dw = ['WILD'] * count_dnas
        #             for dna in range(count_dnas-1):
        #                 dw[dna] = starter_link
        #                 starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    RHS[index] = RHS[index].replace('bs_link', '{}').format(*c)

                if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                    dw = [None] * (count_prots + count_dnas)
                    for dna in range(count_prots + count_dnas):
                        if dna == count_prots:
                            dw[dna] = starter_link
                            dw[dna-1] = starter_link
                            starter_link += 1
                    ## and replace indexes
                    RHS[index] = RHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

                ## final replace
                RHS[index] = RHS[index].replace('prot_link', 'None')
                RHS[index] = RHS[index].replace('met_link', 'None')
                RHS[index] = RHS[index].replace('bs_link', 'WILD')
                RHS[index] = RHS[index].replace('dna_link', 'None')

            ## RHS final join
            RHS = ' +\n    '.join(RHS)
            RHS = RHS.split('    ')
            RHS[4], RHS[5] = RHS[5], RHS[4]
            RHS[4] = RHS[4][:-2] + '%\n'
            RHS = '    '.join(RHS)
            RULE_RHS.append(RHS)

            msg = '# ' + data.iloc[i, 0] + ' slides to ' + data.iloc[i, 1]
            description.append(msg)
#             print(msg + '\n')
            
# print(RHS)
# print()

In [11]:
index = 0
for i in data.index:
    for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
        if data.iloc[i, 1][3:] == dna_part1:
            ## complete rule
            print('{:s}\n' \
                  'Rule(\'sliding_{:d}_{:s}\', \n' \
                  '    {:s} >> \n' \
                  '    {:s}, \n' \
                  '    Parameter(\'fwd_sliding_{:d}_{:s}\', 0))' \
                  .format(description[index], index+1, dna_part1 + '_' + dna_part2.split('-')[-1], 
                          RULE_LHS[index], RULE_RHS[index], index+1, dna_part1 + '_' + dna_part2.split('-')[-1]).replace('-', '_'))
            print()
    index += 1

# [rpoA, rpoA, rpoB, rpoC, rpoD] slides to rpoA_rbs
Rule('sliding_1_rpoA_pro1_rbs', 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = None, met = None, up = 3, dw = 4) %
    prot(name = 'rpoD', dna = 5, met = None, up = 4, dw = None) %
    dna(name = 'rpoA', type = 'pro1', prot = 5, free = 'False', up = WILD, dw = WILD) +
    dna(name = 'rpoA', type = 'rbs', prot = None, free = 'True', up = WILD, dw = WILD) + None >> 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = 4, met = None, up = 3, dw = None) %
    prot(name = 'rpoD', dna = None, met = None, up = None, dw = None) %
    dna(name = 'rpoA', type = 'rbs', prot = 4, free = 'Fa

In [12]:
description = []
RULE_LHS = []

for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    for i in data.index:
        if 'pro' not in dna_part1:
            # data
            agents = (', '.join(data.iloc[i, 0].split(', ')[0:4]) + ', BS-' + dna_part1).replace(']', '') + ']' + ', BS-' + dna_part2
            names = agents.split(', ')

            if debug:
                print('# ' + ', '.join(data.iloc[i, 0].split(', ')[0:4]) + '] slides to ' + dna_part2)

            ## form the LHS
            LHS = []
            next_in_complex = False
            for name in names:
                if name[0] == '[': # we are dealing with the first monomer of a complex
                    molecule = name[1:]
                    next_in_complex = True
                elif name[-1] == ']': # we are dealing with the last monomer of a complex
                    molecule = name[:-1]
                    next_in_complex = False
                elif next_in_complex: # we are dealing with a monomer part of a complex
                    molecule = name
                else:
                    molecule = name

                if 'BS' in name:
                    molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                    LHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                               .format(molecule))
                elif 'SMALL' in name:
                    LHS.append('met(name = \'{:s}\', prot = met_link)' \
                               .format(molecule.replace('SMALL-', '')))
                else:
                    LHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                               .format(molecule))
                    
            if 'ter' not in dna_part2:
                LHS.append('None')

            ## look for where starts and ends a complex in the LHS    
            complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
            monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

            positions = []
            for cplx_pos in reversed(complexes):
                pos_i = None
                pos_f = None
                for index, kmer_pos in enumerate(monomers):
                    if cplx_pos[0] == kmer_pos[0]:
                        pos_i = index
                    if cplx_pos[1] == kmer_pos[1]:
                        pos_f = index
                        positions.append((pos_i, pos_f))
                        break

            ## join complexes following start and end positions
            for position in positions:
                ## join agents and remove from LHS list because they were joined into one position
                LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
                for index in reversed(range(position[0]+1, position[1]+1)):
                    LHS.pop(index)

            ## create numbered links
            starter_link = 1
            for index, agent in enumerate(LHS):
                count_monomers = len(agent.split('%'))
                count_small = agent.count('met(')
                count_prots = agent.count('prot(')
                count_dnas = agent.count('dna(')

                if count_prots > 1:
                    dw = [None] * count_prots
                    for prot in range(count_prots-1):
                        dw[prot] = starter_link
                        starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    LHS[index] = LHS[index].replace('prot_link', '{}').format(*c)

                if count_small >= 1 and count_prots >= 1:
                    dw = [None] * (count_small + count_prots)
                    for met in numpy.arange(0, count_small + count_prots, 2):
                        dw[met] = starter_link
                        dw[met-1] = starter_link
                        starter_link += 1
                    ## and replace indexes
                    LHS[index] = LHS[index].replace('met_link', '{}').format(*tuple(dw))

                if count_dnas > 1:
                    dw = ['WILD'] * count_dnas
        #             for dna in range(count_dnas-1):
        #                 dw[dna] = starter_link
        #                 starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    LHS[index] = LHS[index].replace('bs_link', '{}').format(*c)

                if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                    dw = [None] * (count_prots + count_dnas)
                    for dna in range(count_prots + count_dnas):
                        if dna == count_prots:
                            dw[dna] = starter_link
                            dw[dna-1] = starter_link
                            starter_link += 1
                    ## and replace indexes
                    LHS[index] = LHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

                ## final replace
                LHS[index] = LHS[index].replace('prot_link', 'None')
                LHS[index] = LHS[index].replace('met_link', 'None')
                LHS[index] = LHS[index].replace('bs_link', 'WILD')
                LHS[index] = LHS[index].replace('dna_link', 'None')

            ## LHS final join
            LHS = ' +\n    '.join(LHS)
            RULE_LHS.append(LHS)

            description.append('# ' + ', '.join(data.iloc[i, 0].split(', ')[0:4]) + '] slides to ' + dna_part2)

#             print(LHS)
#             print()
            
        break # do not remove

In [13]:
description = []
RULE_RHS = []

for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    for i in data.index:
        if 'pro' not in dna_part1:
            # data
            agents = (', '.join(data.iloc[i, 0].split(', ')[0:4]) + ', BS-' + dna_part2).replace(']', '') + ']' + ', BS-' + dna_part1
            names = agents.split(', ')

            if debug:
                print('# ' + ', '.join(data.iloc[i, 0].split(', ')[0:4]) + '] slides to ' + dna_part2)

            ## form the RHS
            RHS = []
            next_in_complex = False
            for name in names:
                if name[0] == '[': # we are dealing with the first monomer of a complex
                    molecule = name[1:]
                    next_in_complex = True
                elif name[-1] == ']': # we are dealing with the last monomer of a complex
                    molecule = name[:-1]
                    next_in_complex = False
                elif next_in_complex: # we are dealing with a monomer part of a complex
                    molecule = name
                else:
                    molecule = name

                if 'BS' in name:
                    molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                    RHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                               .format(molecule))
                elif 'SMALL' in name:
                    RHS.append('met(name = \'{:s}\', prot = met_link)' \
                               .format(molecule.replace('SMALL-', '')))
                else:
                    RHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                               .format(molecule))
            
            if 'ter' not in dna_part2:
                molecule = '{:s}\', type = \'{:s}'.format(dna_part2.split('-')[0], dna_part2.split('-')[1])
                RHS.append('rna(name = \'{:s}\', dna = None, prot = None)'.format(molecule))

            ## look for where starts and ends a complex in the RHS    
            complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
            monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

            positions = []
            for cplx_pos in reversed(complexes):
                pos_i = None
                pos_f = None
                for index, kmer_pos in enumerate(monomers):
                    if cplx_pos[0] == kmer_pos[0]:
                        pos_i = index
                    if cplx_pos[1] == kmer_pos[1]:
                        pos_f = index
                        positions.append((pos_i, pos_f))
                        break

            ## join complexes following start and end positions
            for position in positions:
                ## join agents and remove from RHS list because they were joined into one position
                RHS[position[0]] = ' %\n    '.join(RHS[position[0]:position[1]+1])
                for index in reversed(range(position[0]+1, position[1]+1)):
                    RHS.pop(index)

            ## create numbered links
            starter_link = 1
            for index, agent in enumerate(RHS):
                count_monomers = len(agent.split('%'))
                count_small = agent.count('met(')
                count_prots = agent.count('prot(')
                count_dnas = agent.count('dna(')

                if count_prots > 1:
                    dw = [None] * count_prots
                    for prot in range(count_prots-1):
                        dw[prot] = starter_link
                        starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    RHS[index] = RHS[index].replace('prot_link', '{}').format(*c)

                if count_small >= 1 and count_prots >= 1:
                    dw = [None] * (count_small + count_prots)
                    for met in numpy.arange(0, count_small + count_prots, 2):
                        dw[met] = starter_link
                        dw[met-1] = starter_link
                        starter_link += 1
                    ## and replace indexes
                    RHS[index] = RHS[index].replace('met_link', '{}').format(*tuple(dw))

                if count_dnas > 1:
                    dw = ['WILD'] * count_dnas
        #             for dna in range(count_dnas-1):
        #                 dw[dna] = starter_link
        #                 starter_link += 1
                    up = dw[-1:] + dw[:-1]
                    ## and replace indexes
                    c = list(zip(up, dw))
                    c = [elt for sublist in c for elt in sublist]
                    RHS[index] = RHS[index].replace('bs_link', '{}').format(*c)

                if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                    dw = [None] * (count_prots + count_dnas)
                    for dna in range(count_prots + count_dnas):
                        if dna == count_prots:
                            dw[dna] = starter_link
                            dw[dna-1] = starter_link
                            starter_link += 1
                    ## and replace indexes
                    RHS[index] = RHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

                ## final replace
                RHS[index] = RHS[index].replace('prot_link', 'None')
                RHS[index] = RHS[index].replace('met_link', 'None')
                RHS[index] = RHS[index].replace('bs_link', 'WILD')
                RHS[index] = RHS[index].replace('dna_link', 'None')

            ## RHS final join
            RHS = ' +\n    '.join(RHS)
            RULE_RHS.append(RHS)

            description.append('# ' + ', '.join(data.iloc[i, 0].split(', ')[0:4]) + '] slides to ' + dna_part2)

#             print(RHS)
#             print()
            
        break # do not remove

In [14]:
index = 0
for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    if 'pro' not in dna_part1:
        ## complete rule
        print('{:s}\n' \
              'Rule(\'sliding_{:s}\', \n' \
              '    {:s} >> \n' \
              '    {:s}, \n' \
              '    Parameter(\'fwd_sliding_{:s}\', 0))' \
              .format(description[index], dna_part2, RULE_LHS[index], RULE_RHS[index], dna_part2).replace('-', '_'))
        print()
        index += 1

# [rpoA, rpoA, rpoB, rpoC] slides to rpoA_cds
Rule('sliding_rpoA_cds', 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = 4, met = None, up = 3, dw = None) %
    dna(name = 'rpoA', type = 'rbs', prot = 4, free = 'False', up = WILD, dw = WILD) +
    dna(name = 'rpoA', type = 'cds', prot = None, free = 'True', up = WILD, dw = WILD) +
    None >> 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = 4, met = None, up = 3, dw = None) %
    dna(name = 'rpoA', type = 'cds', prot = 4, free = 'False', up = WILD, dw = WILD) +
    dna(name = 'rpoA', type = 'rbs', prot = None, free = 'True', up = WILD, dw = WILD) +
    rna(name = 'rpoA', type =

In [15]:
description = []
RULE_LHS = []

for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    if 'ter' in dna_part2:
        # data
        agents = (', '.join(data.iloc[i, 0].split(', ')[0:4])).replace(']', '') + ', BS-' + dna_part2 + ']'
        names = agents.split(', ')

        if debug:
            print(data.iloc[i, 0] + ' falloff from ' + dna_part2)

        ## form the LHS
        LHS = []
        next_in_complex = False
        for name in names:
            if name[0] == '[': # we are dealing with the first monomer of a complex
                molecule = name[1:]
                next_in_complex = True
            elif name[-1] == ']': # we are dealing with the last monomer of a complex
                molecule = name[:-1]
                next_in_complex = False
            elif next_in_complex: # we are dealing with a monomer part of a complex
                molecule = name
            else:
                molecule = name

            if 'BS' in name:
                if 'ter' in name:
                    molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                LHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                           .format(molecule))
            elif 'SMALL' in name:
                LHS.append('met(name = \'{:s}\', prot = met_link)' \
                           .format(molecule.replace('SMALL-', '')))
            else:
                LHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                           .format(molecule))

        ## look for where starts and ends a complex in the LHS    
        complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
        monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

        positions = []
        for cplx_pos in reversed(complexes):
            pos_i = None
            pos_f = None
            for index, kmer_pos in enumerate(monomers):
                if cplx_pos[0] == kmer_pos[0]:
                    pos_i = index
                if cplx_pos[1] == kmer_pos[1]:
                    pos_f = index
                    positions.append((pos_i, pos_f))
                    break

        ## join complexes following start and end positions
        for position in positions:
            ## join agents and remove from LHS list because they were joined into one position
            LHS[position[0]] = ' %\n    '.join(LHS[position[0]:position[1]+1])
            for index in reversed(range(position[0]+1, position[1]+1)):
                LHS.pop(index)

        ## create numbered links
        starter_link = 1
        for index, agent in enumerate(LHS):
            count_monomers = len(agent.split('%'))
            count_small = agent.count('met(')
            count_prots = agent.count('prot(')
            count_dnas = agent.count('dna(')

            if count_prots > 1:
                dw = [None] * count_prots
                for prot in range(count_prots-1):
                    dw[prot] = starter_link
                    starter_link += 1
                up = dw[-1:] + dw[:-1]
                ## and replace indexes
                c = list(zip(up, dw))
                c = [elt for sublist in c for elt in sublist]
                LHS[index] = LHS[index].replace('prot_link', '{}').format(*c)

            if count_small >= 1 and count_prots >= 1:
                dw = [None] * (count_small + count_prots)
                for met in numpy.arange(0, count_small + count_prots, 2):
                    dw[met] = starter_link
                    dw[met-1] = starter_link
                    starter_link += 1
                ## and replace indexes
                LHS[index] = LHS[index].replace('met_link', '{}').format(*tuple(dw))

            if count_dnas > 1:
                dw = ['WILD'] * count_dnas
    #             for dna in range(count_dnas-1):
    #                 dw[dna] = starter_link
    #                 starter_link += 1
                up = dw[-1:] + dw[:-1]
                ## and replace indexes
                c = list(zip(up, dw))
                c = [elt for sublist in c for elt in sublist]
                LHS[index] = LHS[index].replace('bs_link', '{}').format(*c)

            if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                dw = [None] * (count_prots + count_dnas)
                for dna in range(count_prots + count_dnas):
                    if dna == count_prots:
                        dw[dna] = starter_link
                        dw[dna-1] = starter_link
                        starter_link += 1
                ## and replace indexes
                LHS[index] = LHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

            ## final replace
            LHS[index] = LHS[index].replace('prot_link', 'None')
            LHS[index] = LHS[index].replace('met_link', 'None')
            LHS[index] = LHS[index].replace('bs_link', 'WILD')
            LHS[index] = LHS[index].replace('dna_link', 'None')

        ## LHS final join
        LHS = ' +\n    '.join(LHS)
        RULE_LHS.append(LHS)

        description.append('# ' + data.iloc[i, 0] + ' falloff from ' + dna_part2)

#         print(LHS)
#         print()

In [16]:
description = []
RULE_RHS = []

for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    if 'ter' in dna_part2:
        # data
        agents = ', '.join(data.iloc[i, 0].split(', ')[0:4]) + '], BS-' + dna_part2
        names = agents.split(', ')

        if debug:
            print(data.iloc[i, 0] + ' falloff from ' + dna_part2)

        ## form the RHS
        RHS = []
        next_in_complex = False
        for name in names:
            if name[0] == '[': # we are dealing with the first monomer of a complex
                molecule = name[1:]
                next_in_complex = True
            elif name[-1] == ']': # we are dealing with the last monomer of a complex
                molecule = name[:-1]
                next_in_complex = False
            elif next_in_complex: # we are dealing with a monomer part of a complex
                molecule = name
            else:
                molecule = name

            if 'BS' in name:
                if 'ter' in name:
                    molecule = '{:s}\', type = \'{:s}'.format(molecule.split('-')[-2], molecule.split('-')[-1])
                RHS.append('dna(name = \'{:s}\', prot = dna_link, free = \'True\', up = bs_link, dw = bs_link)' \
                           .format(molecule))
            elif 'SMALL' in name:
                RHS.append('met(name = \'{:s}\', prot = met_link)' \
                           .format(molecule.replace('SMALL-', '')))
            else:
                RHS.append('prot(name = \'{:s}\', dna = dna_link, met = met_link, up = prot_link, dw = prot_link)' \
                           .format(molecule))

        ## look for where starts and ends a complex in the RHS    
        complexes = [(m.start()+1, m.end()-1) for m in re.finditer(r'\[[A-Za-z0-9-_, ]+\]', agents)]
        monomers = [(m.start(), m.end()) for m in re.finditer(r'[A-Za-z0-9-_]+', agents)]

        positions = []
        for cplx_pos in reversed(complexes):
            pos_i = None
            pos_f = None
            for index, kmer_pos in enumerate(monomers):
                if cplx_pos[0] == kmer_pos[0]:
                    pos_i = index
                if cplx_pos[1] == kmer_pos[1]:
                    pos_f = index
                    positions.append((pos_i, pos_f))
                    break

        ## join complexes following start and end positions
        for position in positions:
            ## join agents and remove from RHS list because they were joined into one position
            RHS[position[0]] = ' %\n    '.join(RHS[position[0]:position[1]+1])
            for index in reversed(range(position[0]+1, position[1]+1)):
                RHS.pop(index)

        ## create numbered links
        starter_link = 1
        for index, agent in enumerate(RHS):
            count_monomers = len(agent.split('%'))
            count_small = agent.count('met(')
            count_prots = agent.count('prot(')
            count_dnas = agent.count('dna(')

            if count_prots > 1:
                dw = [None] * count_prots
                for prot in range(count_prots-1):
                    dw[prot] = starter_link
                    starter_link += 1
                up = dw[-1:] + dw[:-1]
                ## and replace indexes
                c = list(zip(up, dw))
                c = [elt for sublist in c for elt in sublist]
                RHS[index] = RHS[index].replace('prot_link', '{}').format(*c)

            if count_small >= 1 and count_prots >= 1:
                dw = [None] * (count_small + count_prots)
                for met in numpy.arange(0, count_small + count_prots, 2):
                    dw[met] = starter_link
                    dw[met-1] = starter_link
                    starter_link += 1
                ## and replace indexes
                RHS[index] = RHS[index].replace('met_link', '{}').format(*tuple(dw))

            if count_dnas > 1:
                dw = ['WILD'] * count_dnas
    #             for dna in range(count_dnas-1):
    #                 dw[dna] = starter_link
    #                 starter_link += 1
                up = dw[-1:] + dw[:-1]
                ## and replace indexes
                c = list(zip(up, dw))
                c = [elt for sublist in c for elt in sublist]
                RHS[index] = RHS[index].replace('bs_link', '{}').format(*c)

            if count_dnas >= 1 and count_prots >= 1: # a protein is complexed with the dna
                dw = [None] * (count_prots + count_dnas)
                for dna in range(count_prots + count_dnas):
                    if dna == count_prots:
                        dw[dna] = starter_link
                        dw[dna-1] = starter_link
                        starter_link += 1
                ## and replace indexes
                RHS[index] = RHS[index].replace('True', 'False').replace('dna_link', '{}').format(*dw)

            ## final replace
            RHS[index] = RHS[index].replace('prot_link', 'None')
            RHS[index] = RHS[index].replace('met_link', 'None')
            RHS[index] = RHS[index].replace('bs_link', 'WILD')
            RHS[index] = RHS[index].replace('dna_link', 'None')

        ## RHS final join
        RHS = ' +\n    '.join(RHS)
        RULE_RHS.append(RHS)

        description.append('# ' + data.iloc[i, 0] + ' falloff from ' + dna_part2)

#         print(RHS)
#         print()

In [17]:
index = 0
for dna_part1, dna_part2 in zip(data_arq.iloc[:,0], data_arq.iloc[:,1]):
    if 'ter' in dna_part2:
        ## complete rule
        print('{:s}\n' \
              'Rule(\'falloff_{:s}\', \n' \
              '    {:s} >> \n' \
              '    {:s}, \n' \
              '    Parameter(\'fwd_falloff_{:s}\', 0))' \
              .format(description[index], dna_part2, RULE_LHS[index], RULE_RHS[index], dna_part2).replace('-', '_'))
        print()
        index += 1

# [rpoA, rpoA, rpoB, rpoC, rpoD] falloff from rpoA_ter1
Rule('falloff_rpoA_ter1', 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = 4, met = None, up = 3, dw = None) %
    dna(name = 'rpoA', type = 'ter1', prot = 4, free = 'False', up = WILD, dw = WILD) >> 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    prot(name = 'rpoA', dna = None, met = None, up = 1, dw = 2) %
    prot(name = 'rpoB', dna = None, met = None, up = 2, dw = 3) %
    prot(name = 'rpoC', dna = None, met = None, up = 3, dw = None) +
    dna(name = 'rpoA', type = 'ter1', prot = None, free = 'True', up = WILD, dw = WILD), 
    Parameter('fwd_falloff_rpoA_ter1', 0))

# [rpoA, rpoA, rpoB, rpoC, rpoD] falloff from rpoC_ter1
Rule('falloff_rpoC_ter1', 
    prot(name = 'rpoA', dna = None, met = None, up = None, dw = 1) %
    