In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from itertools import permutations
from primers import primers

In [3]:
parts = pd.read_csv('datasets/moclo/parts.csv')
sites = pd.read_csv('datasets/moclo/sites.csv')

In [5]:
parts['sequence'] = parts['sequence'].str.upper()

In [7]:
parts['len'] = parts['sequence'].apply(lambda x: len(x))
parts

Unnamed: 0,part_name,sequence,len
0,P11,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,61
1,P16,CTTGGATGAAAAGAAACCCACCGACGGTGTAACCCTGGCGGCCGAT...,60
2,P20,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,60
3,P38,CAGTACAAAATTTTTTAGATGCGTTGTACAACCCTCACGGGGGTGG...,80
4,araC-pBAD,TTATGACAACTTGACGGCTACATCATTCACTTTTTCTTCACAACCG...,1190
5,cymR-pCuma,TCAACGTTTGAATTTTGCATAACGTTCACGTGCAATTTCCAGGGTG...,860
6,rhaS-pRhaB,TTATTGCAGAAAGCCATCCCGTCCCTGGCGAATATCACGCGGTGAC...,1108
7,pCin,CCCTTTGTGCGTCCAAACGGACGCACGGCGCTCTAAAGCGGGTCGC...,226
8,repGFP,ATTAAAGAGGAGAAATACTAGATGCGTAAAGGAGAAGAACTTTTCA...,878
9,e11-N,TCACACAGGACTACTAGATGATGAGCGATAGTCCGCAGAAACTGGG...,1007


In [38]:
prefix = 'gggtctca'
suffix = 'aggtctct'
fusion_sites = pd.Series(sites['sites'].values, index=sites['overhang']).to_dict()
parts_seq = pd.Series(parts['sequence'].values, index=parts['part_name']).to_dict()

In [54]:
primers(parts_seq['P11'])

(Primer(seq='GCCTCCACACCGCTCG', tm=68.5, tm_total=68.5, gc=0.8, dg=0, fwd=True, offtargets=0, penalty=13.4),
 Primer(seq='AGAGGTGTTACGATAGATGGGCG', tm=68.2, tm_total=68.2, gc=0.5, dg=0, fwd=False, offtargets=0, penalty=7.200000000000003))

In [53]:
pd.DataFrame(primers(parts_seq['P11']))

Unnamed: 0,seq,tm,tm_total,gc,dg,fwd,offtargets,penalty
0,GCCTCCACACCGCTCG,68.5,68.5,0.8,0,True,0,13.4
1,AGAGGTGTTACGATAGATGGGCG,68.2,68.2,0.5,0,False,0,7.2


In [43]:
p11_ab_f, p11_ab_r = primers(parts_seq['P11'], add_fwd=prefix+fusion_sites['A'], add_rev=suffix+fusion_sites['B'])
p11_cb_f, p11_cb_r = primers(parts_seq['P11'], add_fwd=prefix+fusion_sites['C'], add_rev=suffix+fusion_sites['B'])

In [49]:
promoters = parts['part_name'].tolist()[:8]
promoters

['P11', 'P16', 'P20', 'P38', 'araC-pBAD', 'cymR-pCuma', 'rhaS-pRhaB', 'pCin']

In [55]:
prom_primers = pd.DataFrame()
for p in tqdm(promoters):
    for s in ['A', 'C', 'D', 'E', 'F', 'G', 'H']:
        primer_pair = pd.DataFrame(primers(parts_seq[p], add_fwd=prefix+fusion_sites[s], add_rev=suffix+fusion_sites['B']))
        primer_pair['part_name'] = p
        primer_pair['scar'] = s + 'B'
        prom_primers = prom_primers.append(primer_pair)

100%|██████████| 8/8 [00:12<00:00,  1.62s/it]


In [59]:
prom_primers.reset_index(drop=True, inplace=True)

In [60]:
prom_primers

Unnamed: 0,seq,tm,tm_total,gc,dg,fwd,offtargets,penalty,part_name,scar
0,GGGTCTCAGGAGGCCTCCACACCGCTCG,68.5,78.4,0.7,-1.76807,True,0,16.63614,P11,AB
1,AGGTCTCTTACTAGAGGTGTTACGATAGATGGGCG,68.2,72.3,0.5,-0.82334,False,0,8.84668,P11,AB
2,GGGTCTCAAATGGCCTCCACACCGCTCG,68.5,76.3,0.6,0.00000,True,0,12.80000,P11,CB
3,AGGTCTCTTACTAGAGGTGTTACGATAGATGGGCG,68.2,72.3,0.5,-0.82334,False,0,8.84668,P11,CB
4,GGGTCTCAAGGTGCCTCCACACCGCTCG,68.5,77.4,0.7,-1.73725,True,0,16.57450,P11,DB
...,...,...,...,...,...,...,...,...,...,...
107,AGGTCTCTTACTAGCGTTTTCAAGTTCGTGG,63.6,69.8,0.5,0.00000,False,0,4.60000,pCin,FB
108,GGGTCTCATGCCCCCTTTGTGCGTCCAAA,63.6,75.9,0.6,-0.68048,True,0,8.26096,pCin,GB
109,AGGTCTCTTACTAGCGTTTTCAAGTTCGTGG,63.6,69.8,0.5,0.00000,False,0,4.60000,pCin,GB
110,GGGTCTCAACTACCCTTTGTGCGTCCAAA,63.6,72.4,0.5,-2.01545,True,0,10.63090,pCin,HB
