In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from itertools import permutations, product
from primers import primers
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Restriction import BsaI, BsmBI

In [2]:
recognition_sites = np.array([(enz, str(Seq(enz).reverse_complement())) for enz in [BsaI.site, BsmBI.site]]).ravel().tolist()
recognition_sites

['GGTCTC', 'GAGACC', 'CGTCTC', 'GAGACG']

#### GFPmut3b

In [64]:
right = 'GAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAAGCTTTGAGACCTTTCATCGCGACCTACTAGTAGCGGCCGCTGCAGGGAGTTGTCTTCGAAGACTTCGCTCTAGTCTTGGACTCCTGTTGATAGATCCAGTAATGACCTCAGAACTCCATCTGGATTTGTTCAGAACGCTCGGTTGCCGCCGGGCGTTTTTTATTGGTGAGAATCCAGGGGTCCCCAATAATTACGATTTAAATTAGTAGCCCGCCTAATGAGCGGGCTTTTTTTTAATTCCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGCATTCAGCATTTTCGTGTGGCGCTGATTCCGTTTTTTGCGGCGTTTTGCCTGCCGGTGTTTGCGCATCCGGAAACCCTGGTGAAAGTGAAAGATGCGGAAGATCAACTGGGTGCGCGCGTGGGCTATATTGAACTGGATCTGAACAGCGGCAAAATTCTGGAATCTTTTCGTCCGGAAGAACGTTTTCCGATGATGAGCACCTTTAAAGTGCTGCTGTGCGGTGCGGTTCTGAGCCGTGTGGATGCGGGCCAGGAACAACTGGGCCGTCGTATTCATTATAGCCAGAACGATCTGGTGGAATATAGCCCGGTGACCGAAAAACATCTGACCGATGGCATGACCGTGCGTGAACTGTGCAGCGCGGCGATTACCATGAGCGATAACACCGCGGCGAACCTGCTGCTGACGACCATTGGCGGTCCGAAAGAACTGACCGCGTTTCTGCATAACATGGGCGATCATGTGACCCGTCTGGATCGTTGGGAACCGGAACTGAACGAAGCGATTCCGAACGATGAACGTGATACCACCATGCCGGCAGCAATGGCGACCACCCTGCGTAAACTGCTGACGGGTGAGCTGCTGACCCTGGCAAGCCGCCAGCAACTGATTGATTGGATGGAAGCGGATAAAGTGGCGGGTCCGCTGCTGCGTAGCGCGCTGCCGGCTGGCTGGTTTATTGCGGATAAAAGCGGTGCGGGCGAACGTGGCAGCCGTGGCATTATTGCGGCGCTGGGCCCGGATGGTAAACCGAGCCGTATTGTGGTGATTTATACCACCGGCAGCCAGGCGACGATGGATGAACGTAACCGTCAGATTGCGGAAATTGGCGCGAGCCTGATTAAACATTGGTAAACCGATACAATTAAAGGCTCCTTTTGGAGCCTTTTTTTTTGGACGACCCTTGTCCTTTTCCGCTGCATAACCCTGCTTCGGGGTCATTATAGCGATTTTTTCGGTATATCCATCCTTTTTCGCACGATATACAGGATTTTGCCAAAGGGTTCGTGTAGACTTTCCTTGGTGTATCCAACGGCGTCAGCCGGGCAGGATAGGTGAAGTAGGCCCACCCGCGAGCGGGTGTTCCTTCTTCACTGTCCCTTATTCGCACCTGGCGGTGCTCAACGGGAATCCTGCTCTGCGAGGCTGGCCGTAGGCCGGCCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTAT'
left = 'CTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCACTCATTAGGCAGGCGCGCCCAGCTGTCTAGGGCGGCGGATTTGTCCTACTCAGGAGAGCGTTCACCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGATGCCTTTAATTAAGGAGTTTTGCAGGTGCACCTGCTTTTCGCTGAATTCGCGGCCGCTTCTAGAGGGTCTGCGATGTTTGGTCTCAAATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGAT'
to_add = 'CCCAACGAAAAGAGAGATCACATGGTCCTTCTT'

In [66]:
fp, rp = primers(left, add_fwd='', add_rev=str(Seq(to_add).reverse_complement()))
fp.seq, rp.seq

('CTTTATAGTCCTGTCGGGTTT',
 'AAGAAGGACCATGTGATCTCTCTTTTCGTTGGGATCTTTCGAAAGGGCAGATT')

In [67]:
fp, rp = primers(right, add_fwd=to_add, add_rev=str(Seq('').reverse_complement()))
fp.seq, rp.seq

('CCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTG', 'ATACCAGGCGTTTCCC')

#### NrdA2 N-term

In [68]:
left = 'GTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCACTCATTAGGCAGGCGCGCCCAGCTGTCTAGGGCGGCGGATTTGTCCTACTCAGGAGAGCGTTCACCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGATGCCTTTAATTAAGGAGTTTTGCAGGTGCACCTGCTTTTCGCTGAATTCGCGGCCGCTTCTAGAGGGTCTGCGATGTTTGGTCTCAAATGCCGCAGCAGACCGATCTGCATGCACGTTTTAGCGCACTGATGCAGCAGCATCGTGGTATTGTTCTGAAAGTTGCAGCAAGCTATTGTCGTGATCCGGATGATCGTGCAGATCTGGCACAGGATATTGCAACCCATCTGTGGCGTGCATTTCCGAGCTATGATCCGCATCGTCGTTTTAGCACCTGGATGTATCGTATTGCACTGAATGTTGCAATTAGCGATCTGCGTAGCCGTCGTTCAAACCTTTGTCTCACGGGCGATGCTAAAATCGACGTACTTATCGATAATATTCCTATCTCCCAGATATCCCTGGAGGAAGTTGTTAACCTGTTTAATGAAGGCAAAGAGATATATGTTTTGTCTTATAATATTGATACCAAGGAGGTTGAATATAAA'
right = 'GCCGAAGTGCTGGAAATCATTGACGAAGAAACTGGGCAGAAAATTGTTTGTACCCCTGATCATAAAGTGTATACTCTGAACCGGGGGTACGTTTCTGCTAAGGATCTCAAAGAAGACGATGAGCTGGTGTTTAGCTAATAAGCTTTGAGACCTTTCATCGCGACCTACTAGTAGCGGCCGCTGCAGGGAGTTGTCTTCGAAGACTTCGCTCTAGTCTTGGACTCCTGTTGATAGATCCAGTAATGACCTCAGAACTCCATCTGGATTTGTTCAGAACGCTCGGTTGCCGCCGGGCGTTTTTTATTGGTGAGAATCCAGGGGTCCCCAATAATTACGATTTAAATTAGTAGCCCGCCTAATGAGCGGGCTTTTTTTTAATTCCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGCATTCAGCATTTTCGTGTGGCGCTGATTCCGTTTTTTGCGGCGTTTTGCCTGCCGGTGTTTGCGCATCCGGAAACCCTGGTGAAAGTGAAAGATGCGGAAGATCAACTGGGTGCGCGCGTGGGCTATATTGAACTGGATCTGAACAGCGGCAAAATTCTGGAATCTTTTCGTCCGGAAGAACGTTTTCCGATGATGAGCACCTTTAAAGTGCTGCTGTGCGGTGCGGTTCTGAGCCGTGTGGATGCGGGCCAGGAACAACTGGGCCGTCGTATTCATTATAGCCAGAACGATCTGGTGGAATATAGCCCGGTGACCGAAAAACATCTGACCGATGGCATGACCGTGCGTGAACTGTGCAGCGCGGCGATTACCATGAGCGATAACACCGCGGCGAACCTGCTGCTGACGACCATTGGCGGTCCGAAAGAACTGACCGCGTTTCTGCATAACATGGGCGATCATGTGACCCGTCTGGATCGTTGGGAACCGGAACTGAACGAAGCGATTCCGAACGATGAACGTGATACCACCATGCCGGCAGCAATGGCGACCACCCTGCGTAAACTGCTGACGGGTGAGCTGCTGACCCTGGCAAGCCGCCAGCAACTGATTGATTGGATGGAAGCGGATAAAGTGGCGGGTCCGCTGCTGCGTAGCGCGCTGCCGGCTGGCTGGTTTATTGCGGATAAAAGCGGTGCGGGCGAACGTGGCAGCCGTGGCATTATTGCGGCGCTGGGCCCGGATGGTAAACCGAGCCGTATTGTGGTGATTTATACCACCGGCAGCCAGGCGACGATGGATGAACGTAACCGTCAGATTGCGGAAATTGGCGCGAGCCTGATTAAACATTGGTAAACCGATACAATTAAAGGCTCCTTTTGGAGCCTTTTTTTTTGGACGACCCTTGTCCTTTTCCGCTGCATAACCCTGCTTCGGGGTCATTATAGCGATTTTTTCGGTATATCCATCCTTTTTCGCACGATATACAGGATTTTGCCAAAGGGTTCGTGTAGACTTTCCTTGGTGTATCCAACGGCGTCAGCCGGGCAGGATAGGTGAAGTAGGCCCACCCGCGAGCGGGTGTTCCTTCTTCACTGTCCCTTATTCGCACCTGGCGGTGCTCAACGGGAATCCTGCTCTGCGAGGCTGGCCGTAGGCCGGCCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTAT'
to_add = 'GAAATTTCTGACGCAGGACTCATCAGTGAATCT'

In [69]:
fp, rp = primers(left, add_fwd='', add_rev=str(Seq(to_add).reverse_complement()))
fp.seq, rp.seq

('GTCCTGTCGGGTTTCG',
 'AGATTCACTGATGAGTCCTGCGTCAGAAATTTCTTTATATTCAACCTCCTTGGTATCA')

In [70]:
fp, rp = primers(right, add_fwd=to_add, add_rev=str(Seq('').reverse_complement()))
fp.seq, rp.seq

('GAAATTTCTGACGCAGGACTCATCAGTGAATCTGCCGAAGTGCTGGAAAT', 'ATACCAGGCGTTTCCC')

#### Alternative Level-1 TetR vector

In [3]:
#import fasta files
fastas = ['pSEVA521', 'pjump27-1asfgfp']
plasmids = pd.DataFrame([(p.id, str(p.seq)) for fasta in fastas \
                        for p in list(SeqIO.parse('datasets/jump/{}.fasta'.format(fasta), 'fasta'))], \
                        columns=['name', 'sequence'])
plasmids['sequence'] = plasmids['sequence'].str.rstrip().str.upper()
plasmids.head()

Unnamed: 0,name,sequence
0,pSEVA521,TTAATTAAAGCGGATAACAATTTCACACAGGAGGCCGCCTAGGCCG...
1,pJUMP27-1A(sfGFP),TTAATTAAGGAGTTTTGCAGGTGCCTTGGAACACCTGCTTTTCGCT...


In [4]:
fragments = pd.read_csv('datasets/jump/fragments.csv')
fragments['sequence'] = fragments['sequence'].str.rstrip().str.upper()
fragments

Unnamed: 0,name,sequence
0,TetR,AAATTTGACAGCTTATCATCGATAAACTGTAATGCGGTAGTTTATC...
1,1A-pSC101-TetR,CGGTCCGCGCGTTGTCCTTTTCCGCTGCATAACCCTGCTTCGGGGT...


In [5]:
def reindex(plasmid):
    
    start = 'cggtccgcgcgttgtccttttccgctgcataaccctgcttcggggtcattatagcgattttttcggtatatccatcctttttcgcacgatatacaggatt'
    new_start = plasmid.find(start.upper())
    if new_start == -1:
        return plasmid
    return plasmid[new_start:] + plasmid[:new_start]

plasmids['sequence'] = plasmids['sequence'].apply(reindex)

In [6]:
templates = dict(zip(plasmids['sequence'], plasmids['name']))
parts = dict(zip(fragments['sequence'], fragments['name']))

In [7]:
for seq in product(*[plasmids['sequence'].tolist(), fragments['sequence'].tolist()]):
    if seq[0].find(seq[1])!=-1:
        print(templates[seq[0]], parts[seq[1]])

pSEVA521 TetR
pJUMP27-1A(sfGFP) 1A-pSC101-TetR


Just hardcoded it for now.

In [8]:
tetR_pref = 'ccaataattacgattt'
tetR_suff = 'cggtccgcgcgttgtc'
backbone_pref = 'CCTCGACCTGAGACAA'
backbone_suff = 'AAATTTGACAGCTTAT'

In [9]:
parts_seq = dict(zip(fragments['name'], fragments['sequence']))

In [10]:
#TetR
fp, rp = primers(parts_seq['TetR'], add_fwd=tetR_pref, add_rev=str(Seq(tetR_suff).reverse_complement()))
fp.seq, rp.seq

('CCAATAATTACGATTTAAATTTGACAGCTTATCATCGA', 'GACAACGCGCGGACCGTTGTCTCAGGTCGAGG')

In [11]:
#Backbone
fp, rp = primers(parts_seq['1A-pSC101-TetR'], add_fwd=backbone_pref, add_rev=str(Seq(backbone_suff).reverse_complement()))
fp.seq, rp.seq

('CCTCGACCTGAGACAACGGTCCGCGCGTTGTCCTTT',
 'ATAAGCTGTCAAATTTAAATCGTAATTATTGGGGACCCCTG')

#### Different Insulators

In [16]:
insulators = pd.read_csv('datasets/jump/insulators.csv')
insulators['first_half'] = insulators['sequence'].apply(lambda x: x[:int(len(x)/2)-16])
insulators['second_ext'] = insulators['sequence'].apply(lambda x: x[int(len(x)/2)-16: int(len(x)/2)])
insulators['first_ext'] = insulators['sequence'].apply(lambda x: x[int(len(x)/2): int(len(x)/2)+16])
insulators['second_half'] = insulators['sequence'].apply(lambda x: x[int(len(x)/2)+16:])
insulators['combined'] = insulators['first_half'] + insulators['second_ext'] + insulators['first_ext'] + insulators['second_half']
insulators['checker'] = insulators['combined']==insulators['sequence']
insulators

Unnamed: 0,name,sequence,first_half,second_ext,first_ext,second_half,combined,checker
0,RiboJ54,aggggtcagttgatgtgctttcaactctgatgagtcagtgatgacg...,aggggtcagttgatgtgcttt,caactctgatgagtca,gtgatgacgaaacccc,ctctacaaataattttgtttaa,aggggtcagttgatgtgctttcaactctgatgagtcagtgatgacg...,True
1,BydvJ,agggtgtctcaaggtgcgtaccttgactgatgagtccgaaaggacg...,agggtgtctcaaggtgcgtacc,ttgactgatgagtccg,aaaggacgaaacaccc,ctctacaaataattttgtttaa,agggtgtctcaaggtgcgtaccttgactgatgagtccgaaaggacg...,True
2,RiboJ57,agaagtcaattaatgtgcttttaattctgatgagtcggtgacgacg...,agaagtcaattaatgtgcttt,taattctgatgagtcg,gtgacgacgaaacttc,ctctacaaataattttgtttaa,agaagtcaattaatgtgcttttaattctgatgagtcggtgacgacg...,True
3,SarJ,gactgtcgccggatgtgtatccgacctgacgatggcccaaaagggc...,gactgtcgccggatgtgtatccg,acctgacgatggccca,aaagggccgaaacagt,cctctacaaataattttgtttaa,gactgtcgccggatgtgtatccgacctgacgatggcccaaaagggc...,True
4,PlmJ,agtcataagtctgggctaagcccactgatgagtcgctgaaatgcga...,agtcataagtctgggctaagccca,ctgatgagtcgctgaa,atgcgacgaaacttat,gacctctacaaataattttgtttaa,agtcataagtctgggctaagcccactgatgagtcgctgaaatgcga...,True
5,RiboJ64,aggagtcaattaatgtgcttttaattctgatgagacggtgacgtcg...,aggagtcaattaatgtgcttt,taattctgatgagacg,gtgacgtcgaaactcc,ctctacaaataattttgtttaa,aggagtcaattaatgtgcttttaattctgatgagacggtgacgtcg...,True
6,RiboJ53,agcggtcaacgcatgtgctttgcgttctgatgagacagtgatgtcg...,agcggtcaacgcatgtgcttt,gcgttctgatgagaca,gtgatgtcgaaaccgc,ctctacaaataattttgtttaa,agcggtcaacgcatgtgctttgcgttctgatgagacagtgatgtcg...,True
7,ScmJ,agcgctgtctgtacttgtatcagtacactgacgagtccctaaagga...,agcgctgtctgtacttgtatcag,tacactgacgagtccc,taaaggacgaaacacc,gcctctacaaataattttgtttaa,agcgctgtctgtacttgtatcagtacactgacgagtccctaaagga...,True
8,RiboJ60,ctgaagtcgtcaagtgctgtgcttgcacttctgatgaggcagtgat...,ctgaagtcgtcaagtgctgtgct,tgcacttctgatgagg,cagtgatgccgaaacg,acctctacaaataattttgtttaa,ctgaagtcgtcaagtgctgtgcttgcacttctgatgaggcagtgat...,True
9,RiboJ10,agcgctcaacgggtgtgcttcccgttctgatgagtccgtgaggacg...,agcgctcaacgggtgtgcttc,ccgttctgatgagtcc,gtgaggacgaaagcgc,ctctacaaataattttgtttaa,agcgctcaacgggtgtgcttcccgttctgatgagtccgtgaggacg...,True


In [18]:
backbone_left = 'acgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaaacgccagcaacgcggcctttttacggttcctggccttttgctggccttttgctcacatgttctttcctgcgttatcccctgattctgtggataaccgtattaccgcctttgagtgagctgataccgctcgccgcagccgaacgaccgagcgcagcgagtcagtgagcgaggaagcggaagagcgcccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaggcgcgcccagctgtctagggcggcggatttgtcctactcaggagagcgttcaccgacaaacaacagataaaacgaaaggcccagtctttcgactgagcctttcgttttatttgatgcctttaattaaggagttttgcaggtgcacctgcttttcgctgaattcgcggccgcttctagagggtctgcgatgtttggtctCATGAC'
backbone_right = 'TACTTGAGACCtttcatcgcgacctactagtagcggccgctgcagggagttgtcttcgaagacttcgctctagtcttggactcctgttgatagatccagtaatgacctcagaactccatctggatttgttcagaacgctcggttgccgccgggcgttttttattggtgagaatccaggggtccccaataattacgatttaaattagtagcccgcctaatgagcgggcttttttttaattcccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagcattcagcattttcgtgtggcgctgattccgttttttgcggcgttttgcctgccggtgtttgcgcatccggaaaccctggtgaaagtgaaagatgcggaagatcaactgggtgcgcgcgtgggctatattgaactggatctgaacagcggcaaaattctggaatcttttcgtccggaagaacgttttccgatgatgagcacctttaaagtgctgctgtgcggtgcggttctgagccgtgtggatgcgggccaggaacaactgggccgtcgtattcattatagccagaacgatctggtggaatatagcccggtgaccgaaaaacatctgaccgatggcatgaccgtgcgtgaactgtgcagcgcggcgattaccatgagcgataacaccgcggcgaacctgctgctgacgaccattggcggtccgaaagaactgaccgcgtttctgcataacatgggcgatcatgtgacccgtctggatcgttgggaaccggaactgaacgaagcgattccgaacgatgaacgtgataccaccatgccggcagcaatggcgaccaccctgcgtaaactgctgacgggtgagctgctgaccctggcaagccgccagcaactgattgattggatggaagcggataaagtggcgggtccgctgctgcgtagcgcgctgccggctggctggtttattgcggataaaagcggtgcgggcgaacgtggcagccgtggcattattgcggcgctgggcccggatggtaaaccgagccgtattgtggtgatttataccaccggcagccaggcgacgatggatgaacgtaaccgtcagattgcggaaattggcgcgagcctgattaaacattggtaaaccgatacaattaaaggctccttttggagcctttttttttggacgacccttgtccttttccgctgcataaccctgcttcggggtcattatagcgattttttcggtatatccatcctttttcgcacgatatacaggattttgccaaagggttcgtgtagactttccttggtgtatccaacggcgtcagccgggcaggataggtgaagtaggcccacccgcgagcgggtgttccttcttcactgtcccttattcgcacctggcggtgctcaacgggaatcctgctctgcgaggctggccgtaggccggccccgtagaaaagatcaaaggatcttcttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttata'

In [19]:
insulators['first_fragment'] = insulators['first_half'] + insulators['second_ext'] + insulators['first_ext']
insulators['second_fragment'] = insulators['second_ext'] + insulators['first_ext'] + insulators['second_half']

In [47]:
insulators['first_fragment'].tolist()

['aggggtcagttgatgtgctttcaactctgatgagtcagtgatgacgaaacccc',
 'agggtgtctcaaggtgcgtaccttgactgatgagtccgaaaggacgaaacaccc',
 'agaagtcaattaatgtgcttttaattctgatgagtcggtgacgacgaaacttc',
 'gactgtcgccggatgtgtatccgacctgacgatggcccaaaagggccgaaacagt',
 'agtcataagtctgggctaagcccactgatgagtcgctgaaatgcgacgaaacttat',
 'aggagtcaattaatgtgcttttaattctgatgagacggtgacgtcgaaactcc',
 'agcggtcaacgcatgtgctttgcgttctgatgagacagtgatgtcgaaaccgc',
 'agcgctgtctgtacttgtatcagtacactgacgagtccctaaaggacgaaacacc',
 'ctgaagtcgtcaagtgctgtgcttgcacttctgatgaggcagtgatgccgaaacg',
 'agcgctcaacgggtgtgcttcccgttctgatgagtccgtgaggacgaaagcgc']

In [51]:
insulators['second_fragment'].tolist()

['caactctgatgagtcagtgatgacgaaaccccctctacaaataattttgtttaa',
 'ttgactgatgagtccgaaaggacgaaacacccctctacaaataattttgtttaa',
 'taattctgatgagtcggtgacgacgaaacttcctctacaaataattttgtttaa',
 'acctgacgatggcccaaaagggccgaaacagtcctctacaaataattttgtttaa',
 'ctgatgagtcgctgaaatgcgacgaaacttatgacctctacaaataattttgtttaa',
 'taattctgatgagacggtgacgtcgaaactccctctacaaataattttgtttaa',
 'gcgttctgatgagacagtgatgtcgaaaccgcctctacaaataattttgtttaa',
 'tacactgacgagtccctaaaggacgaaacaccgcctctacaaataattttgtttaa',
 'tgcacttctgatgaggcagtgatgccgaaacgacctctacaaataattttgtttaa',
 'ccgttctgatgagtccgtgaggacgaaagcgcctctacaaataattttgtttaa']

In [61]:
first_parts, second_parts = [], []
for i, row in tqdm(insulators.iterrows()):
    
    #first fragment
    fwd, rev = '', row['first_fragment']
    fp, rp = primers(backbone_left, add_fwd=fwd, add_rev=str(Seq(rev).reverse_complement()), add_rev_len=(len(rev), 100))
    first_parts.append((fp.seq, rp.seq))
    
    #second fragment
    fwd, rev = row['second_fragment'], ''
    fp, rp = primers(backbone_right, add_fwd=fwd, add_rev=str(Seq(rev).reverse_complement()), add_fwd_len=(len(fwd), 100))
    second_parts.append((fp.seq, rp.seq))

10it [00:31,  3.12s/it]


In [62]:
primer_pair = pd.DataFrame([(b[0], a[1]) for a, b in zip(first_parts, second_parts)])
primer_pair.columns = ['fwd', 'rev']
primer_pair['name'] = np.arange(len(primer_pair))
primer_pair = primer_pair.melt(id_vars='name').sort_values('name').reset_index(drop=True)
primer_pair.to_csv('datasets/jump/insulator_primers_.csv', index=False)

In [63]:
[(b[0], a[1]) for a, b in zip(first_parts, second_parts)]

[('CAACTCTGATGAGTCAGTGATGACGAAACCCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GGGGTTTCGTCATCACTGACTCATCAGAGTTGAAAGCACATCAACTGACCCCTGTCATGAGACCAAACATCGCA'),
 ('TTGACTGATGAGTCCGAAAGGACGAAACACCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GGGTGTTTCGTCCTTTCGGACTCATCAGTCAAGGTACGCACCTTGAGACACCCTGTCATGAGACCAAACATCGCA'),
 ('TAATTCTGATGAGTCGGTGACGACGAAACTTCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GAAGTTTCGTCGTCACCGACTCATCAGAATTAAAAGCACATTAATTGACTTCTGTCATGAGACCAAACATCGCA'),
 ('ACCTGACGATGGCCCAAAAGGGCCGAAACAGTCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'ACTGTTTCGGCCCTTTTGGGCCATCGTCAGGTCGGATACACATCCGGCGACAGTCGTCATGAGACCAAACATCGCA'),
 ('CTGATGAGTCGCTGAAATGCGACGAAACTTATGACCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCG',
  'ATAAGTTTCGTCGCATTTCAGCGACTCATCAGTGGGCTTAGCCCAGACTTATGACTGTCATGAGACCAAACATCGCA'),
 ('TAATTCTGATGAGACGGTGACGTCGAAACTCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GGAGTTTCGACGTCACCGTCTCATCAGAATTAAAAGCACATTAATTGACTCCTGTCATGAGACCAAACATCGCA'),
 ('GCGTTCTGATGAGACAGTGATG

In [45]:
[(b[0], a[1]) for a, b in zip(first_parts, second_parts)]

[('TGATGACGAAACCCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GACTCATCAGAGTTGAAAGCACATCAACTGACCCCTGTCATGAGACCAAACATCGCAG'),
 ('AAGGACGAAACACCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'ACTCATCAGTCAAGGTACGCACCTTGAGACACCCTGTCATGAGACCAAACATCGCAG'),
 ('GACGAAACTTCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGCGACC',
  'GACTCATCAGAATTAAAAGCACATTAATTGACTTCTGTCATGAGACCAAACATCGCAG'),
 ('AGGGCCGAAACAGTCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'GCCATCGTCAGGTCGGATACACATCCGGCGACAGTCGTCATGAGACCAAACATCGCAG'),
 ('GACGAAACTTATGACCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'TTCAGCGACTCATCAGTGGGCTTAGCCCAGACTTATGACTGTCATGAGACCAAACATCG'),
 ('TGACGTCGAAACTCCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'TCTCATCAGAATTAAAAGCACATTAATTGACTCCTGTCATGAGACCAAACATCGCAG'),
 ('TGATGTCGAAACCGCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'TCTCATCAGAACGCAAAGCACATGCGTTGACCGCTGTCATGAGACCAAACATCGCAG'),
 ('AGGACGAAACACCGCCTCTACAAATAATTTTGTTTAATACTTGAGACCTTTCATCGC',
  'ACTCGTCAGTGTACTGATACAAGTACAGACAGCGCTGTCA