In [1]:
import numpy as np
import pandas as pd
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from tqdm import tqdm
from itertools import permutations
import seaborn as sns
import matplotlib.pyplot as plt
from Bio.Restriction import BsaI, BsmBI
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
from primers import primers
import primer3 as pr
import warnings
warnings.filterwarnings("ignore")

Read parts

In [24]:
#import parts
parts = pd.read_csv('datasets/jump/parts-split-mcherry.csv').fillna('')
parts['sequence'] = (parts['prefix'] + parts['sequence'] + parts['suffix']).str.rstrip().str.upper()
parts = parts[['name', 'overhang', 'sequence']]

Read map

In [25]:
mapping = pd.read_csv('datasets/dictionary.csv')
mapping_full = dict(zip(mapping['full_name'], mapping['short_name']))
mapping_short = dict(zip(mapping['short_name'], mapping['full_name']))

Read template plasmids

In [26]:
fastas = ['split-reporter']
plasmids = pd.DataFrame([(p.id, str(p.seq)) for fasta in fastas \
                        for p in list(SeqIO.parse('datasets/jump/{}.fasta'.format(fasta), 'fasta'))], \
                        columns=['name', 'sequence'])
plasmids['sequence'] = plasmids['sequence'].str.upper()
plasmids

Unnamed: 0,name,sequence
0,FP-R182-mCherry-M86-C,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
1,FP-R194-mCherry-gp41-1-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
2,FP-R195-mCherry-gp41-1-N,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
3,FP-R222-mCherry-M86-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
4,FP-R224-mCherry-gp41-8-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
5,FP-R225-mCherry-NrdJ-1-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
6,FP-R226-mCherry-IMPDH-1-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
7,FP-R227-mCherry-SspDnaX-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
8,FP-R228-mCherry-SspGyrB-residues,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...
9,FP-R232-mCherry-M86-N,GAATTCGCGGCCGCTTCTAGAGTTATGACAACTTGACGGCTACATC...


JUMP recognition sites and affixes

In [27]:
recognition_sites = np.array([(enz, str(Seq(enz).reverse_complement())) for enz in [BsaI.site, BsmBI.site]]).ravel().tolist()
prefix, suffix = recognition_sites[2] + recognition_sites[0] + 'A', 'T' + recognition_sites[1] + 'T' + recognition_sites[3]

Finding templates and checking restriction sites. The results are valid parts, invalid parts (containing restriction sites), and missing templates.

In [28]:
def find_templates(parts, plasmids, mapping=None):

    templates = pd.DataFrame([(part['name'], plasmid['name']) for _, part in parts.iterrows() \
                      for _, plasmid in plasmids.iterrows() if plasmid['sequence'].find(part['sequence'])!=-1], \
                      columns=['name', 'template'])
    if mapping:
        templates['template'] = templates['template'].apply(lambda x: mapping[x] if x in mapping else x)
    templates = templates.groupby('name')['template'].apply(lambda x: ', '.join(x)).reset_index()
    
    missing_parts = list(set(parts['name'].tolist()).difference(set(templates['name'].tolist())))

    return templates, missing_parts

mCherry_map = {
    'FP-R416-mCherry-Mja-KlbA-N': 'mCherry-Mja-KlbA_N',
    'FP-R490-mCherry-Mja-KlbA-C': 'mCherry-Mja-KlbA_C',
    'FP-R399-mCherry-Mja-KlbA-residues': 'mCherry-Mja-KlbA_Mut',
    'FP-R235-mCherry-NrdJ-1-N': 'mCherry-NrdJ-1_N',
    'FP-R325-mCherry-NrdJ-1-C': 'mCherry-NrdJ-1_C',
    'FP-R225-mCherry-NrdJ-1-residues'a: 'mCherry-NrdJ-1_Mut'
}
templates, missing_parts = find_templates(parts, plasmids, mapping=mCherry_map)

In [29]:
def check_internal_sites(parts_ori, forbidden_sites):

    parts = parts_ori.copy()
    parts['num_forbidden_sites'] = parts['sequence'].apply(lambda x: np.sum([x.count(a) for a in forbidden_sites]))
    invalid_parts = parts[parts['num_forbidden_sites']>0].reset_index(drop=True)
    valid_parts = parts[parts['num_forbidden_sites']==0].reset_index(drop=True)

    return valid_parts, invalid_parts

valid_parts, invalid_parts = check_internal_sites(parts[~parts['name'].isin(missing_parts)], recognition_sites)

In [30]:
valid_parts

Unnamed: 0,name,overhang,sequence,num_forbidden_sites
0,mCherry-M86_N,NOC,GTGAGCAAGGGCGAAGAAGACAACATGGCCATCATCAAGGAGTTCA...,0
1,mCherry-gp41-1_N,NOC,GTGAGCAAGGGCGAAGAAGACAACATGGCCATCATCAAGGAGTTCA...,0
2,mCherry-gp41-8_N,NOC,GTGAGCAAGGGCGAAGAAGACAACATGGCCATCATCAAGGAGTTCA...,0


In [31]:
invalid_parts

Unnamed: 0,name,overhang,sequence,num_forbidden_sites


In [32]:
missing_parts

['mCherry-M86_Mut',
 'mCherry-gp41-8_Mut',
 'mCherry-gp41-1_C',
 'mCherry-gp41-8_C',
 'mCherry-M86_C',
 'mCherry-gp41-1_Mut']

#### Design Overhangs & Primers

In [34]:
def design_fragments(parts, prefix, suffix):

    fragments = pd.merge(parts, pd.read_csv('datasets/jump/overhang.csv'), on='overhang', how='left') \
                [['name', 'sequence', 'left_site', 'right_site']]
    fragments['left_overhang'] = prefix + fragments['left_site']
    fragments['right_overhang'] = fragments['right_site'] + suffix
    fragments['ext_sequence'] = fragments['left_overhang'] + fragments['sequence'] + fragments['right_overhang']
    fragments['size'] = fragments['ext_sequence'].apply(lambda x: len(x)+1) #following benchling convention to start from index 1

    return fragments

fragments = design_fragments(valid_parts, prefix, suffix)

In [None]:
#fragments.to_csv('datasets/jump/fragment-output.csv')
fragments[['name', 'ext_sequence']].to_csv('datasets/jump/fragments-split-reporter-output.csv')

In [35]:
def design_primers(parts, prefix, suffix):
    
    failed_parts = []
    primers_list = []
    
    for _, part in tqdm(parts.iterrows()):
        
        try:
            fp, rp = primers(part['sequence'], add_fwd=prefix + part['left_site'],
                             add_rev=str(Seq(suffix).reverse_complement()) + str(Seq(part['right_site']).reverse_complement()))
            
            #forward primers
            primers_list.append((part['name'], fp.seq, 
                                 fp.tm, fp.tm_total, fp.gc, fp.dg, fp.fwd, fp.offtargets, fp.penalty))
            #reverse primers
            primers_list.append((part['name'], rp.seq,
                                 rp.tm, rp.tm_total, rp.gc, rp.dg, rp.fwd, rp.offtargets, rp.penalty))
        
        except:
            print('Error on', part['name'], ': cannot find any feasible primer design, check your fragments!')
            failed_parts.append(part['name'])
            continue
        
    return pd.DataFrame(primers_list, columns=['name', 'sequence', 'tm', 'tm_total', 'gc', 'dg', 'fwd', 'offtargets', 'penalty'])
        
raw_primers = design_primers(fragments, prefix, suffix)
raw_primers

3it [00:00,  5.04it/s]


Unnamed: 0,name,sequence,tm,tm_total,gc,dg,fwd,offtargets,penalty
0,mCherry-M86_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,63.6,75.7,0.6,-1.308865,True,0,10.51773
1,mCherry-M86_N,CGTCTCAGGTCTCAAAGCTTATTATTCTAGTTTACGGGGTAGAG,62.6,72.7,0.4,-0.26094,False,0,5.42188
2,mCherry-gp41-1_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,63.6,75.7,0.6,-1.308865,True,0,10.51773
3,mCherry-gp41-1_N,CGTCTCAGGTCTCAAAGCTTATTATTCTTTAACATACAGGCACATAC,62.2,71.9,0.4,0.0,False,0,7.5
4,mCherry-gp41-8_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAGA,64.0,75.5,0.6,-1.308865,True,0,9.91773
5,mCherry-gp41-8_N,CGTCTCAGGTCTCAAAGCTTATTATTTTGCACGGCTACGC,63.9,74.3,0.5,0.0,False,0,1.9


In [44]:
numbering_start = 1

primer_seq = raw_primers[['fwd', 'sequence']].drop_duplicates().reset_index(drop=True)
primer_seq['primer_name'] = pd.Series(primer_seq.index).apply(lambda x: 'P' + str(x+numbering_start).zfill(len(str(primer_seq.shape[0]))))
primer_seq.loc[primer_seq['fwd'], 'primer_name'] = primer_seq['primer_name'] + '.F'
primer_seq.loc[~primer_seq['fwd'], 'primer_name'] = primer_seq['primer_name'] + '.R'
final_primers = pd.merge(primer_seq[['primer_name', 'sequence']], raw_primers.drop('name', axis=1), \
                         on='sequence', how='left').drop_duplicates().reset_index(drop=True)
final_primers

Unnamed: 0,primer_name,sequence,tm,tm_total,gc,dg,fwd,offtargets,penalty
0,P1.F,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,63.6,75.7,0.6,-1.308865,True,0,10.51773
1,P2.R,CGTCTCAGGTCTCAAAGCTTATTATTCTAGTTTACGGGGTAGAG,62.6,72.7,0.4,-0.26094,False,0,5.42188
2,P3.R,CGTCTCAGGTCTCAAAGCTTATTATTCTTTAACATACAGGCACATAC,62.2,71.9,0.4,0.0,False,0,7.5
3,P4.F,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAGA,64.0,75.5,0.6,-1.308865,True,0,9.91773
4,P5.R,CGTCTCAGGTCTCAAAGCTTATTATTTTGCACGGCTACGC,63.9,74.3,0.5,0.0,False,0,1.9


In [45]:
parts_x_primers = pd.merge(raw_primers[['name', 'sequence', 'fwd']], final_primers[['primer_name', 'sequence']], on='sequence', how='left')
parts_x_primers['full_name'] = '(' + parts_x_primers['primer_name'].str[:-2] + 'J-RM)_' + parts_x_primers['name'] + parts_x_primers['primer_name'].str[-2:]
parts_x_primers

Unnamed: 0,name,sequence,fwd,primer_name,full_name
0,mCherry-M86_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,True,P1.F,(P1J-RM)_mCherry-M86_N.F
1,mCherry-M86_N,CGTCTCAGGTCTCAAAGCTTATTATTCTAGTTTACGGGGTAGAG,False,P2.R,(P2J-RM)_mCherry-M86_N.R
2,mCherry-gp41-1_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,True,P1.F,(P1J-RM)_mCherry-gp41-1_N.F
3,mCherry-gp41-1_N,CGTCTCAGGTCTCAAAGCTTATTATTCTTTAACATACAGGCACATAC,False,P3.R,(P3J-RM)_mCherry-gp41-1_N.R
4,mCherry-gp41-8_N,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAGA,True,P4.F,(P4J-RM)_mCherry-gp41-8_N.F
5,mCherry-gp41-8_N,CGTCTCAGGTCTCAAAGCTTATTATTTTGCACGGCTACGC,False,P5.R,(P5J-RM)_mCherry-gp41-8_N.R


In [46]:
final_primers_ = pd.merge(parts_x_primers[['full_name', 'primer_name']], final_primers, on='primer_name', how='left')
#final_primers_.to_csv('datasets/jump/split-reporter-final_primers.csv', index=False)
final_primers_

Unnamed: 0,full_name,primer_name,sequence,tm,tm_total,gc,dg,fwd,offtargets,penalty
0,(P1J-RM)_mCherry-M86_N.F,P1.F,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,63.6,75.7,0.6,-1.308865,True,0,10.51773
1,(P2J-RM)_mCherry-M86_N.R,P2.R,CGTCTCAGGTCTCAAAGCTTATTATTCTAGTTTACGGGGTAGAG,62.6,72.7,0.4,-0.26094,False,0,5.42188
2,(P1J-RM)_mCherry-gp41-1_N.F,P1.F,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAG,63.6,75.7,0.6,-1.308865,True,0,10.51773
3,(P3J-RM)_mCherry-gp41-1_N.R,P3.R,CGTCTCAGGTCTCAAAGCTTATTATTCTTTAACATACAGGCACATAC,62.2,71.9,0.4,0.0,False,0,7.5
4,(P4J-RM)_mCherry-gp41-8_N.F,P4.F,CGTCTCGGTCTCAAATGGTGAGCAAGGGCGAAGA,64.0,75.5,0.6,-1.308865,True,0,9.91773
5,(P5J-RM)_mCherry-gp41-8_N.R,P5.R,CGTCTCAGGTCTCAAAGCTTATTATTTTGCACGGCTACGC,63.9,74.3,0.5,0.0,False,0,1.9


### Parts Amplification via Phusion PCR

#### Generate PCR Parameters

In [47]:
def get_annealed_part(a, b):
    return [a[i].replace(b[i], '') for i in range(len(b))]

def calculate_tm(seq):
    #return pr.calcTm(seq, dna_conc=250, dntp_conc=10,
    #                  tm_method='santalucia', salt_corrections_method='santalucia')
    
    return np.median([mt.Tm_Wallace(seq), mt.Tm_GC(seq), mt.Tm_NN(seq)])

pcr_primers = pd.merge(parts_x_primers, \
                fragments[['name', 'ext_sequence', 'left_overhang', 'right_overhang', 'size']], \
                on='name', how='left')
pcr_primers.loc[~pcr_primers['fwd'], 'sequence'] = pcr_primers['sequence']. \
            apply(lambda x: str(Seq(x).reverse_complement()))
pcr_primers.loc[pcr_primers['fwd'], 'overhang_part'] = pcr_primers['left_overhang']
pcr_primers.loc[~pcr_primers['fwd'], 'overhang_part'] = pcr_primers['right_overhang']
pcr_primers['annealing_part'] = get_annealed_part(pcr_primers['sequence'], pcr_primers['overhang_part'])
pcr_primers['tm'] = np.round(pcr_primers['annealing_part'].apply(calculate_tm), 1)
pcr_primers['tm_product'] = np.round(pcr_primers['ext_sequence'].apply(calculate_tm), 1)
pcr_primers['ext_time'] = pcr_primers['size'].apply(lambda x: max([60, x/1000 * 60]))
fwd_primers = pcr_primers[pcr_primers['fwd']] \
              [['name', 'primer_name', 'tm', 'tm_product', 'size', 'ext_time']]
rev_primers = pcr_primers[~pcr_primers['fwd']] \
              [['name', 'primer_name', 'tm', 'tm_product', 'size', 'ext_time']]

In [48]:
pcr_rxn = pd.merge(fwd_primers, rev_primers, on=['name', 'tm_product', 'size', 'ext_time'], how='outer')
pcr_rxn['ta'] = 0.3 * np.max([pcr_rxn['tm_x'], pcr_rxn['tm_y']], axis=0) + 0.7 * pcr_rxn['tm_product'] - 14.9
pcr_rxn['primer_name_x'] = pcr_rxn['primer_name_x'].str.split('_', expand=True)[0] \
                       .str.replace('(', '').str.replace(')', '')
pcr_rxn['primer_name_y'] = pcr_rxn['primer_name_y'].str.split('_', expand=True)[0] \
                       .str.replace('(', '').str.replace(')', '')
pcr_rxn = pcr_rxn[['name', 'size', 'primer_name_x', 'primer_name_y', 'ta', 'ext_time']] \
   .sort_values('ta').reset_index(drop=True)
pcr_rxn.columns = ['name', 'size', 'forward_primer', 'reversed_primer', 'annealing_temp', 'ext_time']
pcr_rxn = pd.merge(pcr_rxn, templates, on='name', how='left')
pcr_rxn

Unnamed: 0,name,size,forward_primer,reversed_primer,annealing_temp,ext_time,template
0,mCherry-M86_N,825,P1.F,P2.R,58.79,60,FP-R232-mCherry-M86-N
1,mCherry-gp41-1_N,789,P1.F,P3.R,59.42,60,FP-R195-mCherry-gp41-1-N
2,mCherry-gp41-8_N,792,P4.F,P5.R,60.46,60,FP-R264-mCherry-gp41-8-N


In [None]:
pcr_rxn.to_csv('datasets/jump/pcr_rxn.csv', index=False)

#### Level 0 plasmid map

In [None]:
fragments[['name', 'ext_sequence']]

In [None]:
def reindex_ps1(plasmid):
    '''Reindex plasmids to start from the annealing region of PS1 so the fragment will be in the middle of the sequence.
    This will do nothing for a fragment from pcr products as they do not have PS1 region'''
    
    new_start = plasmid.find('AGGGCGGCGGATTTGTCC')
    if new_start == -1:
        return plasmid
    return plasmid[new_start:] + plasmid[:new_start]

def len_amplicon(plasmid):
    '''Calculate the length of amplicon from PS1 to PS2, the plasmid always starts from PS1'''
    
    ps2 = 'GAACGCTCGGTTGCCGC' #reverse complement of PS2
    return len(plasmid[:plasmid.find(ps2)]) + len(ps2)


def get_sites(part, odd_level=True, vector=False):
    
    enz = BsaI if odd_level else BsmBI
    
    if vector:
        
        site = str(Seq(enz.site).reverse_complement())
        right_idx = part.find(site) - 5
        right_cut = part[right_idx: right_idx+4]

        site = enz.site
        left_idx = part.find(site) + len(site) + 1
        left_cut = part[left_idx: left_idx+4]
        
        fragment = part[left_idx:] + part[:right_idx+4]
        
    else:
        site = enz.site
        left_idx = part.find(site) + len(site) + 1
        left_cut = part[left_idx: left_idx+4]
        
        site = str(Seq(enz.site).reverse_complement())
        right_idx = part.find(site) - 5
        right_cut = part[right_idx: right_idx+4]
        
        fragment = part[left_idx: right_idx+4]
        
    return left_cut, right_cut, fragment

def assemble_lvl_0(fragments, uac):
    
    constructs = []
    for name in fragments:

        uac_sites = get_sites(uac, odd_level=False, vector=True)
        frag_sites = get_sites(fragments[name], odd_level=False, vector=False)

        if (uac_sites[1]==frag_sites[0] and uac_sites[0]==frag_sites[1]):
            assembly = reindex_ps1(frag_sites[2][:-4] + uac_sites[2][:-4])
            constructs.append((name, assembly))
            
    df_constructs = pd.DataFrame(constructs, columns=['name', 'sequence'])
    df_constructs['size'] = df_constructs['sequence'].apply(lambda x: len(x))
    df_constructs['amplicon'] = df_constructs['sequence'].apply(len_amplicon)
    return df_constructs

def generate_lvl_0_map(fragments):

    uac = 'AGGGCGGCGGATTTGTCCTACTCAGGAGAGCGTTCACCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGATGCCTTTAATTAAGGAGTTTTGCAGGTGCACCTGCTTTTCGCTGAATTCGCGGCCGCTTCTAGAGGGTCTGCGATGTTTGGTCTTGAGACGACTGTGACAAGGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCGTACTTGTTTAACTTTAAGAAGGAGATATACAATGGTAGCCCGTAAAGGCGAAGAGCTGTTCACTGGTGTCGTCCCTATTCTGGTGGAACTGGATGGTGATGTCAACGGTCATAAGTTTTCCGTGCGTGGCGAGGGTGAAGGTGACGCAACTAATGGTAAACTGACGCTGAAGTTCATCTGTACTACTGGTAAACTGCCGGTACCTTGGCCGACTCTGGTAACGACGCTGACTTATGGTGTTCAGTGCTTTGCTCGTTATCCGGACCATATGAAGCAGCATGACTTCTTCAAGTCCGCCATGCCGGAAGGCTATGTGCAGGAACGCACGATTTCCTTTAAGGATGACGGCACGTACAAAACGCGTGCGGAAGTGAAATTTGAAGGCGATACCCTGGTAAACCGCATTGAGCTGAAAGGCATTGACTTTAAAGAAGATGGCAATATCCTGGGCCATAAGCTGGAATACAATTTTAACAGCCACAATGTTTACATCACCGCCGATAAACAAAAAAATGGCATTAAAGCGAATTTTAAAATTCGCCACAACGTGGAGGATGGCAGCGTGCAGCTGGCTGATCACTACCAGCAAAACACTCCAATCGGTGATGGTCCTGTTCTGCTGCCAGACAATCACTATCTGAGCACGCAAAGCGTTCTGTCTAAAGATCCGAACGAGAAACGCGATCATATGGTTCTGCTGGAGTTCGTAACCGCAGCGGGCATCACGCATGGTATGGATGAACTGTACAAAGGTTCGTAACTTCTGACTGAGTTGCACGCTCCTTGGTCAGCGTCTCAGACCTTTCATCGCGACCTACTAGTAGCGGCCGCTGCAGGGAGTTGTCTTCGAAGACTTCGCTCTAGTCTTGGACTCCTGTTGATAGATCCAGTAATGACCTCAGAACTCCATCTGGATTTGTTCAGAACGCTCGGTTGCCGCCGGGCGTTTTTTATTGGTGAGAATCCAGGGGTCCCCAATAATTACGATTTAAATTAGTAGCCCGCCTAATGAGCGGGCTTTTTTTTAATTCCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGCATTCAGCATTTTCGTGTGGCGCTGATTCCGTTTTTTGCGGCGTTTTGCCTGCCGGTGTTTGCGCATCCGGAAACCCTGGTGAAAGTGAAAGATGCGGAAGATCAACTGGGTGCGCGCGTGGGCTATATTGAACTGGATCTGAACAGCGGCAAAATTCTGGAATCTTTTCGTCCGGAAGAACGTTTTCCGATGATGAGCACCTTTAAAGTGCTGCTGTGCGGTGCGGTTCTGAGCCGTGTGGATGCGGGCCAGGAACAACTGGGCCGTCGTATTCATTATAGCCAGAACGATCTGGTGGAATATAGCCCGGTGACCGAAAAACATCTGACCGATGGCATGACCGTGCGTGAACTGTGCAGCGCGGCGATTACCATGAGCGATAACACCGCGGCGAACCTGCTGCTGACGACCATTGGCGGTCCGAAAGAACTGACCGCGTTTCTGCATAACATGGGCGATCATGTGACCCGTCTGGATCGTTGGGAACCGGAACTGAACGAAGCGATTCCGAACGATGAACGTGATACCACCATGCCGGCAGCAATGGCGACCACCCTGCGTAAACTGCTGACGGGTGAGCTGCTGACCCTGGCAAGCCGCCAGCAACTGATTGATTGGATGGAAGCGGATAAAGTGGCGGGTCCGCTGCTGCGTAGCGCGCTGCCGGCTGGCTGGTTTATTGCGGATAAAAGCGGTGCGGGCGAACGTGGCAGCCGTGGCATTATTGCGGCGCTGGGCCCGGATGGTAAACCGAGCCGTATTGTGGTGATTTATACCACCGGCAGCCAGGCGACGATGGATGAACGTAACCGTCAGATTGCGGAAATTGGCGCGAGCCTGATTAAACATTGGTAAACCGATACAATTAAAGGCTCCTTTTGGAGCCTTTTTTTTTGGACGACCCTTGTCCTTTTCCGCTGCATAACCCTGCTTCGGGGTCATTATAGCGATTTTTTCGGTATATCCATCCTTTTTCGCACGATATACAGGATTTTGCCAAAGGGTTCGTGTAGACTTTCCTTGGTGTATCCAACGGCGTCAGCCGGGCAGGATAGGTGAAGTAGGCCCACCCGCGAGCGGGTGTTCCTTCTTCACTGTCCCTTATTCGCACCTGGCGGTGCTCAACGGGAATCCTGCTCTGCGAGGCTGGCCGTAGGCCGGCCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCACTCATTAGGCAGGCGCGCCCAGCTGTCT'

    #fragments['name'] = fragments['name'].map(mapping)
    fragments['name'] = 'pJ0-' + fragments['name']
    fragments_map = dict(zip(fragments['name'], fragments['ext_sequence']))
    return assemble_lvl_0(fragments_map, uac)

new_parts = generate_lvl_0_map(fragments[['name', 'ext_sequence']])

In [None]:
new_parts

In [None]:
new_promoters = new_parts[new_parts['name'].str.endswith('Pp')]
new_promoters.to_csv('datasets/jump/new_promoters.csv', index=False)

In [None]:
new_cds = new_parts[new_parts['name'].str.endswith('NOC')]
new_cds.to_csv('datasets/jump/new_cds.csv', index=False)

In [None]:
new_cds

#### Master Mix Calculation

In [None]:
phusion_pcr = {
    'H2O': 12.4,
    'HF/GC Buffer': 4,
    'dNTPs': 0.4,
    #'FP': 1,
    #'RP': 1,
    #'DNA': 1,
    'DMSO': 0.6,
    'Phusion': 0.2,
    'Excess': 6
}

num_rxn = len(pcr_rxn)
master_mix = pd.DataFrame.from_dict(phusion_pcr, orient='index').rename(columns={0: 'vol'})[:-1]
master_mix['mix'] = master_mix['vol'] * num_rxn
factor = np.round((np.sum(master_mix)['mix'] + phusion_pcr['Excess'])/np.sum(master_mix)['vol'], 2)
master_mix['mix'] = master_mix['vol'] * factor
master_mix

#### Level 0 Assembly

In [None]:
lvl0_rxn = {
    #'Fragment': 2,
    'UAC': 1,
    'BsmBI': 0.5,
    'T4-DNA-Ligase': 0.25,
    'Buffer': 2,
    'H2O': 14.25,
    'Excess': 6
}

num_rxn = len(pcr_rxn)
master_mix = pd.DataFrame.from_dict(lvl0_rxn, orient='index').rename(columns={0: 'vol'})[:-1]
master_mix['mix'] = master_mix['vol'] * num_rxn
factor = np.round((np.sum(master_mix)['mix'] + lvl0_rxn['Excess'])/np.sum(master_mix)['vol'], 2)
master_mix['mix'] = master_mix['vol'] * factor
master_mix