# Design Primers from Aquarium

## Imports

In [1]:
import sys
import os

path = os.path.abspath('..')
if path not in sys.path:
    sys.path.insert(0, os.path.abspath('..'))
    
sys.path

['/home/justin/Github/aqbuildtools',
 '/home/justin/Github/aqbuildtools/examples',
 '/home/justin/anaconda3/envs/aqbt/lib/python38.zip',
 '/home/justin/anaconda3/envs/aqbt/lib/python3.8',
 '/home/justin/anaconda3/envs/aqbt/lib/python3.8/lib-dynload',
 '',
 '/home/justin/anaconda3/envs/aqbt/lib/python3.8/site-packages',
 '/home/justin/anaconda3/envs/aqbt/lib/python3.8/site-packages/IPython/extensions',
 '/home/justin/.ipython']

In [4]:
from primer3plus.utils import reverse_complement as rc
import primer3
from aqbt.contrib.uwbf import primers

primer_design = primers.PrimerDesign()

template = primers.Templates.eyfp

pairs = primer_design.design_primers(template, fwd_primer='gtgagcaagggcgaggag')


## Load sessions

In [2]:
from aqbt import AquariumBuildTools
aqtools = AquariumBuildTools.from_toml('creds.secret.toml')
aqtools.sessions

aq = aqtools.sessions['local']['aquarium']

### Get Aq Primers

In [3]:
import pydent
from aqbt.contrib.uwbf import primers

df = primers.get_aq_primers_df(aq)
df.head()

  0%|          | 0/34609 [00:00<?, ?it/s]

Unnamed: 0,id,name,anneal,overhang,sequence
0,1,IAA1-Nat-F,ATGGAAGTCACCAATGGGCTTAACCTTAAG,AAAAAGCAGGCTTCAAA,AAAAAGCAGGCTTCAAAATGGAAGTCACCAATGGGCTTAACCTTAAG
1,14,prKL1573,AGTTTATCATTATCAATACTCGCCATTTCAAAGAA,CCTTAACCAGATTCGAAAAGCGGC,CCTTAACCAGATTCGAAAAGCGGCAGTTTATCATTATCAATACTCG...
2,15,prKL744,ATCCACTAGTTCTAGAATCCGTCGAAACTAAGTT,TTTGTAG,TTTGTAGATCCACTAGTTCTAGAATCCGTCGAAACTAAGTT
3,35,prKL1927,AACACCCCTTGTATTACTGTTTATGTAAGCAGACA,,AACACCCCTTGTATTACTGTTTATGTAAGCAGACA
4,36,prKL1928,TTTTTCCCTCCTTACAGTTTCTGATTTGTG,TGAAAAGTTCTTCTCCTTTACGCAT,TGAAAAGTTCTTCTCCTTTACGCATTTTTTCCCTCCTTACAGTTTC...


### Design Primers

In [12]:
from primer3plus.utils import reverse_complement as rc
import primer3

primer_design = primers.PrimerDesign()

template = primers.Templates.eyfp

pairs = primer_design.design_primers(template, fwd_primer='gtgagcaagggcgaggag')
pairs

[{'PENALTY': 5.644685978798116,
  'COMPL_ANY_TH': 23.930054863624093,
  'COMPL_END_TH': 8.325462298707293,
  'PRODUCT_SIZE': 708},
 {'PENALTY': 2.409566417476105,
  'SEQUENCE': 'gtgagcaagggcgaggag',
  'location': (0, 18),
  'TM': 60.12535037557467,
  'GC_PERCENT': 66.66666666666667,
  'SELF_ANY_TH': 0.0,
  'SELF_END_TH': 0.0,
  'HAIRPIN_TH': 47.45849701646665,
  'END_STABILITY': 3.69,
  'OVERHANG': ''},
 {'PENALTY': 3.024497615153428,
  'PROBLEMS': " Hairpin stability too high; Too many GCs at 3' end;",
  'SEQUENCE': 'cagctcgtccatgccgag',
  'location': (707, 18),
  'TM': 60.58441839356533,
  'GC_PERCENT': 66.66666666666667,
  'SELF_ANY_TH': 21.774274837603684,
  'SELF_END_TH': 21.774274837603684,
  'HAIRPIN_TH': 50.93306366967181,
  'END_STABILITY': 4.63,
  'OVERHANG': ''}]

In [38]:
explain = 'considered 12, high tm 8, ok 2'

import re

def parse_explain(explain):
    d = []
    for token in explain.split(', '):
        groups = re.search('(.+?)\s(\d+)', token).groups()
        d.append(groups)
    return dict(d)

new_explain = {}
for k, v in explains.items():
    if isinstance(v, str):
        v = parse_explain(v)
    new_explain[k] = v
    
new_explain

{'PRIMER_LEFT_EXPLAIN': {'considered': '12', 'high tm': '8', 'ok': '2'},
 'PRIMER_RIGHT_EXPLAIN': {'considered': '12', 'high tm': '5', 'ok': '4'},
 'PRIMER_PAIR_EXPLAIN': {'considered': '4', 'ok': '4'},
 'PRIMER_LEFT_NUM_RETURNED': 1,
 'PRIMER_RIGHT_NUM_RETURNED': 1,
 'PRIMER_INTERNAL_NUM_RETURNED': 0,
 'PRIMER_PAIR_NUM_RETURNED': 1}

In [40]:
def sum_dicts(a, b):
    c = dict(a)
    for k, v in a.items():
        if isinstance(v, dict):
            c[k] = sum_dicts(a[k], b[k])
        else:
            if k not in c:
                c[k] = 0
            else:
                c[k] += b[k]
    return c

sum_dicts({'x': 1}, {'y': 1})

KeyError: 'x'

In [33]:
primer3.calcHomodimerTm('gtgagcaagggcgaggag' + rc('gtgagcaagggcgaggag'))

73.40745513532806

In [34]:
primer3.calcHairpinTm('gtgagcaagggcgaggag' + rc('gtgagcaagggcgaggag'))

83.83078811600512

In [47]:
import primer3plus

d = primer3plus.new()

d.PRIMER_PAIR_WT_COMPL_ANY.value = 1.0

In [24]:
bindings = primers.create_anneal_df(template, df.sequence, df.name, min_tm=50)
bindings.head()

  df['left_term'] = _is_left_end_terminal(df)


Unnamed: 0,name,anneal,overhang,primer,start,length,top_strand_slice,strand,tm,left_term,right_term
7,yeGFP-f,TCTAAAGGTGAAGAATTATTCACTGG,ATG,ATGTCTAAAGGTGAAGAATTATTCACTGG,0,26,"(0, 26)",1,51.24,True,False
8,PS-yeGFP-f,TCTAAAGGTGAAGAATTATTCACTGG,cggattctagaactagtggatctacaaaATG,cggattctagaactagtggatctacaaaATGTCTAAAGGTGAAGAA...,0,26,"(0, 26)",1,51.24,True,False
9,kozak-2ndT-yeGFP-f,TCTAAAGGTGAAGAATTATTCACTGG,aactagtgccacactagttctagaatccaaaATG,aactagtgccacactagttctagaatccaaaATGTCTAAAGGTGAA...,0,26,"(0, 26)",1,51.24,True,False
10,2xC3-kozak-yeGFP-f,TCTAAAGGTGAAGAATTATTCACTGG,TAGGATCCTACTGTATGTACccaTAGGATCCTACTGTATGTACaaaATG,TAGGATCCTACTGTATGTACccaTAGGATCCTACTGTATGTACaaa...,0,26,"(0, 26)",1,51.24,True,False
11,2xC7-kozak-yeGFP-f,TCTAAAGGTGAAGAATTATTCACTGG,ATGGCATGCATGTGCTCTGTccaATGGCATGCATGTGCTCTGTaaaATG,ATGGCATGCATGTGCTCTGTccaATGGCATGCATGTGCTCTGTaaa...,0,26,"(0, 26)",1,51.24,True,False


In [71]:
from primer3plus.utils import reverse_complement as rc

# rflank = 'aaaaaaaaaaaaaaaaaaa'
# anneal = 'cgctcgaaggctttaatttgatgtcgtaataaccccgccccg'
rflank = 'aaaaaaaaaaaaaaaaaaaaa'
t = 'gtcccaattttggttgaattagatggtgatgttaatggtcacaaattttctgtctccggtgaaggtgaaggtgatgctacttac' + rflank

primer_rc = 'ctccggtgaaggtgaaggtgatgctacttac' + rflank

rc(primer_rc)

'tttttttttttttttttttttgtaagtagcatcaccttcaccttcaccggag'

### Design from existing left primer

In [59]:
def _resolve_region(template_len, start, length, end):
    if start is not None:
        if length is None and end is not None:
            length = end - start
            region = (start, length)
        elif length is not None:
            if end is not None:
                assert end == start + length
            region = (start, length)
        else:
            raise ValueError
    else:
        region = (0, len(template))

def design_primers(template, start=None, length=None, end=None, lseq=None, rseq=None, lflank=None, rflank=None, n=1, min_tm=50):
    region = _resolve_region(len(template), start, length, end)
    
    if isinstance(rseq, pd.DataFrame):
        df = rseq
        bindings_df = primers.create_anneal_df(template, df.sequence, df.name, min_tm=50)
        bindings_df = bindngs_df[bindings_df['strand'] == -1]
        
    elif isinstance(lseq, list):
        pass
    elif isinstance(lseq, str):
        pass
    
    
design_primers(template)
    

In [62]:
designs = _design_primers(template, region, lseq='cgctcgaaggctttaatttgatgtcgtaataaccccgccccg', rseq=None, right_overhang='aaaaa', n=1)
designs

({0: {'PAIR': {'PENALTY': 31.166181635391467,
    'COMPL_ANY_TH': 0.0,
    'COMPL_END_TH': 0.0,
    'PRODUCT_SIZE': 497},
   'LEFT': {'PENALTY': 26.095868658750078,
    'PROBLEMS': ' Temperature too high; Too long;',
    'SEQUENCE': 'cgctcgaaggctttaatttgatgtcgtaataaccccgccccg',
    'location': [0, 42],
    'TM': 71.09586865875008,
    'GC_PERCENT': 48.57142857142857,
    'SELF_ANY_TH': 0.0,
    'SELF_END_TH': 0.0,
    'HAIRPIN_TH': 0.0,
    'END_STABILITY': 5.73,
    'OVERHANG': ''},
   'RIGHT': {'PENALTY': 5.0703129766413895,
    'SEQUENCE': 'atggtgtttatgcaaagaaaccact',
    'location': (496, 25),
    'TM': 59.92968702335861,
    'GC_PERCENT': 36.0,
    'SELF_ANY_TH': 14.019192410150993,
    'SELF_END_TH': 4.192303735437633,
    'HAIRPIN_TH': 36.436099151708675,
    'END_STABILITY': 4.0,
    'OVERHANG': 'aaaaa'}}},
  'PRIMER_RIGHT_EXPLAIN': 'considered 15, low tm 6, ok 15',
  'PRIMER_PAIR_EXPLAIN': 'considered 1, ok 1',
  'PRIMER_LEFT_NUM_RETURNED': 1,
  'PRIMER_RIGHT_NUM_RETURNED': 1,

In [55]:
import pandas as pd

primers = bindings.primer

template = 'cgctcgaaggctttaatttgatgtcgtaataaccccgccccgtgcaggccttttgaaaagcaagcataaaagatctaaacataaaatctgtaaaataacaagatgtaaagataatgctaaatcatttggctttttgattgattgtacaggaaaatatacatcgcagggggttgacttttaccatttcaccgcaatggaatcaaacttgttgaagagaatgttcacaggcgcatacgctacaatgacccgattcttgctagccttttctcggtcttgcaaacaaccgccggcagcttagtatataaatacacatgtacatacctctctccgtatcctcgtaatcattttcttgtatttatcgtcttttcgctgtaaaaactttatcacacttatctcaaatacacttattaaccgcttttactattatcttctacgctgacagtaatatcaaacagtgacacatattaaacacagtggtttctttgcataaacaccat'
region = (0, len(template))
all_designs = []
for p in bindings.primer:
    try:
        designs = _design_primers(template, region, lseq='cgctcgaaggctttaatttgatgtcgtaataaccccgccccg', rseq=None, n=1)
        if designs[0]:
            all_designs.append(designs)
    except:
        pass
    
pairs = []
for x in all_designs:
    for y in x[0].values():
        pairs.append(y)

pairs = sorted(pairs, key=lambda x: x['PAIR']['PENALTY'])
pairs

[{'PAIR': {'PENALTY': 31.166181635391467,
   'COMPL_ANY_TH': 0.0,
   'COMPL_END_TH': 0.0,
   'PRODUCT_SIZE': 497},
  'LEFT': {'PENALTY': 26.095868658750078,
   'PROBLEMS': ' Temperature too high; Too long;',
   'SEQUENCE': 'cgctcgaaggctttaatttgatgtcgtaataaccccgccccg',
   'location': [0, 42],
   'TM': 71.09586865875008,
   'GC_PERCENT': 48.57142857142857,
   'SELF_ANY_TH': 0.0,
   'SELF_END_TH': 0.0,
   'HAIRPIN_TH': 0.0,
   'END_STABILITY': 5.73,
   'OVERHANG': ''},
  'RIGHT': {'PENALTY': 5.0703129766413895,
   'SEQUENCE': 'atggtgtttatgcaaagaaaccact',
   'location': (496, 25),
   'TM': 59.92968702335861,
   'GC_PERCENT': 36.0,
   'SELF_ANY_TH': 14.019192410150993,
   'SELF_END_TH': 4.192303735437633,
   'HAIRPIN_TH': 36.436099151708675,
   'END_STABILITY': 4.0,
   'OVERHANG': ''}},
 {'PAIR': {'PENALTY': 31.166181635391467,
   'COMPL_ANY_TH': 0.0,
   'COMPL_END_TH': 0.0,
   'PRODUCT_SIZE': 497},
  'LEFT': {'PENALTY': 26.095868658750078,
   'PROBLEMS': ' Temperature too high; Too long;',

In [43]:
"considered 15, low tm 4, ok 15".split(', ')

['considered 15', 'low tm 4', 'ok 15']

In [41]:
all_designs

[({0: {'PAIR': {'PENALTY': 13.075533082321613,
     'COMPL_ANY_TH': 0.0,
     'COMPL_END_TH': 0.0,
     'PRODUCT_SIZE': 200},
    'LEFT': {'PENALTY': 8.7928302941678,
     'PROBLEMS': ' Hairpin stability too high;',
     'SEQUENCE': 'tctaaaggtgaagaattattcactgg',
     'location': (0, 26),
     'TM': 57.2071697058322,
     'GC_PERCENT': 34.61538461538461,
     'SELF_ANY_TH': 33.16317392155554,
     'SELF_END_TH': 21.30781055401809,
     'HAIRPIN_TH': 51.56381098775216,
     'END_STABILITY': 4.0,
     'OVERHANG': 'ATG'},
    'RIGHT': {'PENALTY': 4.282702788153813,
     'SEQUENCE': 'acaccataaccgaaagtagtgact',
     'location': (199, 24),
     'TM': 59.71729721184619,
     'GC_PERCENT': 41.666666666666664,
     'SELF_ANY_TH': 0.0,
     'SELF_END_TH': 0.0,
     'HAIRPIN_TH': 37.594332539309505,
     'END_STABILITY': 3.41,
     'OVERHANG': ''}}},
  {'PRIMER_RIGHT_EXPLAIN': 'considered 15, low tm 4, ok 15',
   'PRIMER_PAIR_EXPLAIN': 'considered 1, ok 1',
   'PRIMER_LEFT_NUM_RETURNED': 1,
   'PR

TomlDecodeError: Found invalid character in key name: '#'. Try quoting the key name. (line 2 column 5 char 5)