In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from itertools import permutations, product
from primers import primers
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Restriction import BsaI, BsmBI

In [2]:
recognition_sites = np.array([(enz, str(Seq(enz).reverse_complement())) for enz in [BsaI.site, BsmBI.site]]).ravel().tolist()
recognition_sites

['GGTCTC', 'GAGACC', 'CGTCTC', 'GAGACG']

In [6]:
#import fasta files
fastas = ['pSEVA521', 'pjump27-1asfgfp']
plasmids = pd.DataFrame([(p.id, str(p.seq)) for fasta in fastas \
                        for p in list(SeqIO.parse('datasets/jump/{}.fasta'.format(fasta), 'fasta'))], \
                        columns=['name', 'sequence'])
plasmids['sequence'] = plasmids['sequence'].str.rstrip().str.upper()
plasmids.head()

Unnamed: 0,name,sequence
0,pSEVA521,TTAATTAAAGCGGATAACAATTTCACACAGGAGGCCGCCTAGGCCG...
1,pJUMP27-1A(sfGFP),TTAATTAAGGAGTTTTGCAGGTGCCTTGGAACACCTGCTTTTCGCT...


In [10]:
fragments = pd.read_csv('datasets/jump/fragments.csv')
fragments['sequence'] = fragments['sequence'].str.rstrip().str.upper()
fragments

Unnamed: 0,name,sequence
0,TetR,AAATTTGACAGCTTATCATCGATAAACTGTAATGCGGTAGTTTATC...
1,1A-pSC101-TetR,CGGTCCGCGCGTTGTCCTTTTCCGCTGCATAACCCTGCTTCGGGGT...


In [26]:
def reindex(plasmid):
    
    start = 'cggtccgcgcgttgtccttttccgctgcataaccctgcttcggggtcattatagcgattttttcggtatatccatcctttttcgcacgatatacaggatt'
    new_start = plasmid.find(start.upper())
    if new_start == -1:
        return plasmid
    return plasmid[new_start:] + plasmid[:new_start]

plasmids['sequence'] = plasmids['sequence'].apply(reindex)

In [28]:
templates = dict(zip(plasmids['sequence'], plasmids['name']))
parts = dict(zip(fragments['sequence'], fragments['name']))

In [29]:
for seq in product(*[plasmids['sequence'].tolist(), fragments['sequence'].tolist()]):
    if seq[0].find(seq[1])!=-1:
        print(templates[seq[0]], parts[seq[1]])

pSEVA521 TetR
pJUMP27-1A(sfGFP) 1A-pSC101-TetR


<b>Just hardcoded it for now.</b>

In [32]:
tetR_pref = 'ccaataattacgattt'
tetR_suff = 'cggtccgcgcgttgtc'
backbone_pref = 'CCTCGACCTGAGACAA'
backbone_suff = 'AAATTTGACAGCTTAT'

In [31]:
parts_seq = dict(zip(fragments['name'], fragments['sequence']))

In [33]:
#TetR
fp, rp = primers(parts_seq['TetR'], add_fwd=tetR_pref, add_rev=str(Seq(tetR_suff).reverse_complement()))
fp.seq, rp.seq

('CCAATAATTACGATTTAAATTTGACAGCTTATCATCGA', 'GACAACGCGCGGACCGTTGTCTCAGGTCGAGG')

In [34]:
#Backbone
fp, rp = primers(parts_seq['1A-pSC101-TetR'], add_fwd=backbone_pref, add_rev=str(Seq(backbone_suff).reverse_complement()))
fp.seq, rp.seq

('CCTCGACCTGAGACAACGGTCCGCGCGTTGTCCTTT',
 'ATAAGCTGTCAAATTTAAATCGTAATTATTGGGGACCCCTG')