In [115]:
import random
import math

## The init method initializes all parameters to zero. 

### If any parameter needs to have a default value, you can set it here. 

### All penalty parameters are set to a default value of 10

In [116]:
class PrimerDesign(object):
    
    def __init__ (self):
        
        '''parameters for the length criterion'''
        self.max_length = 18 #long enough for adequate specificity
        self.min_length = 22 #easily bind to template at annealing temperature
        self.penalty_length = 10 
        
        '''parameters for the temperature difference criterion'''
        self.max_tdiff = 5 #if annealing temperature of forward and reverse primer is too large, primers will not anneal properly
        self.min_tdiff = -5
        self.penalty_tdiff = 10
        
        '''parameters for the cg content criterion'''
        self.max_cg = 0.6 #GC-rich tracts tend to form inverted repeats, or hairpin structures, thus will not anneal properly to the target DNA template
        self.min_cg = 0.4 #AT-rich strectches are hard to amplify under standard reaction conditions
        self.penalty_cg = 10
        
        '''parameters for the annealing temperature criterion'''
        self.max_temp = 65 #if too high, primer might not bind because bonds are broken as they are formed
        self.min_temp = 50 #if too low, primer could bind imperfectly
        self.penalty_temp = 10
        
        '''parameters for the run criterion'''
        self.run_threshold = 4 #likely to have errors in replication
        self.penalty_runs = 10
        
        '''parameters for the repeat criterion'''
        self.repeat_threshold = 2 #likely to have errors in replication
        self.penalty_repeats = 10
        
        '''parameters for the specificity criterion'''
        self.penalty_specificity = 10 
        
        '''locations where the forward primer should be chosen from'''
        self.fp_start = 100
        self.fp_end = 200
        
        '''locations where the reverse primer should be chosen from'''
        self.rp_start = 300
        self.rp_end = 400
        
        ''' parameters for the simulated annealing portion'''
        self.initial_temperature = 200
        self.stopping_temperature = 0.01
        self.drop_fraction = 0.999
        

### Task 2 

In [1]:
class PrimerDesign(PrimerDesign): 
    
    def set_dna_sequence(self, dna_sequence):
        nucleotides = ["a", "t", "c", "g"]
        output_list = []
        for char in dna_sequence: 
            if char in nucleotides: 
                output_list.append(char)
#         print("output", output_list)
        dna_sequence = "".join(output_list)
        self.dna_sequence = dna_sequence
        return self.dna_sequence

NameError: name 'PrimerDesign' is not defined

In [118]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
print(test.dna_sequence, len(test.dna_sequence))

cccactgggcccagaaaggcagccaccaaattagcctggacaaccctgactaccagcaggacttctttcccaaggaagccaagccaaatggcatctttaagggctccacagctgaaaatgcagaatacctaagggtcgcgccacaaagcagtgaatttattggagcatgaccacggaggatagtatgagccctaaaaatccagactctttcgatacccaggaccaagccacagcaggtcctccatcccaacagccatgcccgcattagctcttagacccacagactggttttgcaacgtttacaccgactagccaggaagtacttccacctcgggcacattttgggaagttgcattcctttgtcttcaaactgtgaagcatttacagaaacgcatccagcaagaatattgtccctttgagcagaaatttatctttcaaagaggtatatttgaaaaaaaaaaaaagtatatgtgaggatttttattgattggggatcttggagtttttcattgtcgctattgatttttacttcaatgggctcttccaacaaggaagaagcttgctggtagcacttgctaccctgagttcatccaggcccaactgtgagcaaggagcacaagccacaagtcttccagaggatgcttgattccagtggttctgcttcaaggcttccactgcaaaacactaaagatccaagaaggccttcatggccccagcagg 720


### Task 3

In [119]:
import random 

class PrimerDesign(PrimerDesign):
    
    def func_select_random(self, sqtype='forward', length = 20 ):
        
        '''the length has to be a positive number'''
        
        if(sqtype == 'forward'):
            output_list = []
            start_limit = self.fp_start 
            end_limit = self.fp_end 
            start_pos = random.randint(start_limit, end_limit-length)
            self.forward_primer = self.dna_sequence[start_pos:start_pos+length]
            return self.forward_primer 
        
        elif(sqtype == 'reverse'):
            start_limit = self.rp_start 
            end_limit = self.rp_end
            start_pos= random.randint(start_limit, end_limit-length)
            self.reverse_primer = self.dna_sequence[start_pos: start_pos+length]
            return self.reverse_primer
        
        else: 
            return None

In [120]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
print(test.func_select_random('forward'))
print(test.func_select_random('reverse'))

atgaccacggaggatagtat
gaagttgcattcctttgtct


### Task 4

In [121]:
class PrimerDesign(PrimerDesign): 
    
    def func_length(self, sq):
        return len(sq)
    
    def func_cg_fraction(self, sq):
        count = 0
        for base in sq:
            if base == 'c' or base == 'g':
                count+=1
        return count/len(sq)
    
    def func_temperature(self,sq):
        optimal_temp = 4*(sq.count('c') + sq.count('g')) + 2*(sq.count('a') + sq.count('t'))
        return optimal_temp

In [122]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
sq = test.func_select_random('forward')
print(sq)
print(test.func_length(sq))
print(test.func_cg_fraction(sq))
print(test.func_temperature(sq))

cggaggatagtatgagccct
20
0.55
62


In [123]:
class PrimerDesign(PrimerDesign):

    def func_count_runs(self,sq):
        sq_list = [i for i in sq]
        runs = 0
        count = 0
        base = ' '
        for i in sq_list:
            if i == base:
                count += 1
            else:
                if count >= self.run_threshold:
                    runs += 1
                count = 0
                base = i
        if count >= self.run_threshold:
            runs += 1
        return runs

In [124]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
sq = test.func_select_random('forward')
print(sq)
print(test.func_count_runs(sq))
x = 'aaaaabaa'
print(x)
print(test.func_count_runs(x))
y = 'aattttcccccggggg'
print(y)
print(test.func_count_runs(y))

tttattggagcatgaccacg
0
aaaaabaa
1
aattttcccccggggg
2


In [125]:
class PrimerDesign(PrimerDesign):
    def func_count_repeats(self,sq):
        di_repeats = ['at','ac','ag','ca','ct','cg','ga','gt','gc','ta','tc','tg']
        repeats = 0
        count = 0

        for i in di_repeats:
            x = sq[:]
            while len(x)>= 2:
                if x[0] + x[1] == i:
                    while len(x) >= 2 and x[0] + x[1] == i:
                        count += 1
                        x = x[2:]
                    if count > 1:
                        repeats += count - 1
                    count = 0

                    while len(x) >= 2 and x[0] + x[1] != i:
                        x = x[1:]
                else:
                    x = x[1:]
            if count > 1:
                repeats += count -1
            count = 0
        return repeats

In [126]:
class PrimerDesign(PrimerDesign):
    def func_count_repeats(self,sq):
        
        di_repeats = ['at','ac','ag','ca','ct','cg','ga','gt','gc','ta','tc','tg']
        sequence_version1 = []
        sequence_version2= []

        for i in range(0, len(sq), 2):
            sequence_version1.append(sq[i:i+2])

        for i in range(1, len(sq), 2):
            sequence_version2.append(sq[i:i+2])

        count = 0
        count1 = 0
        total_repeats = 0
        
        for i in range(len(sequence_version1)):
            try:
                if sequence_version1[i+1] in di_repeats and sequence_version1[i+1] == sequence_version1[i]:
                    count +=1
#                     print("match", sequence_version1[i+1], sequence_version2[i])
                else:
                    total_repeats += count
                    count = 0
            except:
                pass

        for i in range(len(sequence_version2)):
            try:
                if sequence_version2[i+1] in di_repeats and sequence_version2[i+1] == sequence_version2[i]:
                    count +=1
                else:
                    total_repeats += count
                    count = 0
        
            except:
                pass
            
        return total_repeats

In [127]:
x = 'atatatcgtata'
y = 'acacacttcgcgcgcg'
z = 'gtacacacttacacacag'
test = PrimerDesign()
print(test.func_count_repeats(x))
print(test.func_count_repeats(y))
print(test.func_count_repeats(z))

4
8
7


### Task 5

In [173]:
class PrimerDesign(PrimerDesign):
    
    def cost_length(self, sq):
        '''This is given to you as an example '''
        sq_len = len(sq)
        if(sq_len > self.max_length):
            return (sq_len - self.max_length)*self.penalty_length
        elif(sq_len > self.min_length):
            return 0
        else:
            return (self.min_length - sq_len)*self.penalty_length 
    
    def cost_temperature(self, sq):
        temp = self.func_temperature(sq)
        if temp > self.max_temp:
            cost_temp = self.penalty_temp*(temp-self.max_temp)
        elif temp > self.min_temp:
            cost_temp = 0
        else:
            cost_temp = self.penalty_temp*(self.min_temp-temp)
        return cost_temp
        
    def cost_cgcontent(self,sq):
        cg_content = self.func_cg_fraction(sq)
        if cg_content > 0.6:
            cost_cgcontent = self.penalty_cg*(cg_content-0.6)
        elif cg_content >= 0.4:
            cost_cgcontent = 0
        else:
            cost_cgcontent = self.penalty_cg*(0.4-cg_content)
        return cost_cgcontent
        
    def cost_temperature_difference(self, fp, rp):
        temp_fp = self.func_temperature(fp)
        temp_rp = self.func_temperature(rp)
        temp_diff = abs(temp_fp-temp_rp)
        if temp_diff > self.max_tdiff:
            cost_temp_diff = self.penalty_tdiff*(temp_diff-self.max_tdiff)
        else:
            cost_temp_diff = 0
        return cost_temp_diff
    
    def cost_specificity(self, sq):
        return self.penalty_specificity*(self.dna_sequence.count(sq)-1)
    
    def cost_runs(self, sq):
        return self.func_count_runs(sq) * self.penalty_runs
    
    def cost_repeats(self,sq):
        return self.func_count_repeats(sq) * self.penalty_repeats

### Task 6

In [168]:
class PrimerDesign(PrimerDesign):
    
    def cost_objective_function(self, fp, rp):
        '''complete the calculation of the cost'''
        
        cost_fp = self.cost_length(fp) + self.cost_temperature(fp) + self.cost_cgcontent(fp) + self.cost_specificity(fp) + self.cost_runs(fp) + self.cost_repeats(fp)
        cost_rp = self.cost_length(rp) + self.cost_temperature(rp) + self.cost_cgcontent(rp) + self.cost_specificity(rp) + self.cost_runs(rp) + self.cost_repeats(rp)
        total_cost = cost_fp + cost_rp + self.cost_temperature_difference(fp,rp)
        return total_cost 

In [169]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
fp = test.func_select_random('forward')
rp = test.func_select_random('reverse')
print(test.cost_objective_function(fp,rp))

1091.0


### Task 7

In [174]:
class PrimerDesign(PrimerDesign):
    
    def cost_objective_function_info(self, fp, rp):
        line = '{:<80}'.format('===Forward Primer=== {}'.format(fp)) + '\n' 
        line += '{:<25}'.format('Criterion') + '{:>25}'.format('Cost Function Score') + '{:>30}'.format('Criteria Met') + '\n' 
        line += '-'*80 + '\n'
        line += '{:<25}'.format('length') + '{:>25}'.format('{0:.3f}'.format(self.func_length(fp))) + '\n'
        line += '{:<25}'.format('annealing temperature') + '{:>25}'.format('{0:.3f}'.format(self.func_temperature(fp))) + '\n'
        line += '{:<25}'.format('%cg_content') + '{:>25}'.format('{0:.3f}'.format(self.cost_cgcontent(fp))) + '\n'
        line += '{:<25}'.format('specificty') + '{:>25}'.format('{0:.3f}'.format(self.cost_specificity(fp))) + '\n'
        line += '{:<25}'.format('runs') + '{:>25}'.format('{0:.3f}'.format(self.cost_runs(fp))) + '\n'
        line += '{:<25}'.format('repeats') + '{:>25}'.format('{0:.3f}'.format(self.cost_repeats(fp))) + '\n'
        line += '\n'

        line += '{:<50}'.format('===Reverse Primer=== {}'.format(rp)) + '\n' 
        line += '{:<25}'.format('Criterion') + '{:>25}'.format('Cost Function Score') + '\n' 
        line += '-'*80 + '\n'
        line += '{:<25}'.format('length') + '{:>25}'.format('{0:.3f}'.format(self.func_length(rp))) + '\n'
        line += '{:<25}'.format('annealing temperature') + '{:>25}'.format('{0:.3f}'.format(self.func_temperature(rp))) + '\n'
        line += '{:<25}'.format('%cg_content') + '{:>25}'.format('{0:.3f}'.format(self.cost_cgcontent(rp))) + '\n'
        line += '{:<25}'.format('specificty') + '{:>25}'.format('{0:.3f}'.format(self.cost_specificity(rp))) + '\n'
        line += '{:<25}'.format('runs') + '{:>25}'.format('{0:.3f}'.format(self.cost_runs(rp))) + '\n'
        line += '{:<25}'.format('repeats') + '{:>25}'.format('{0:.3f}'.format(self.cost_repeats(rp))) + '\n'
        line += '\n'

        line += '{:<25}'.format('Temperature Difference') + '{:>25}'.format('{0:.3f}'.format(self.cost_objective_function(fp,rp))) + '\n'
        
        return line

test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
sq = test.func_select_random('forward')
fp = test.func_select_random('forward')
rp = test.func_select_random('reverse')
info = test.cost_objective_function_info(fp,rp)
print(info)

===Forward Primer=== gaatacctaagggtcgcgcc                                       
Criterion                      Cost Function Score                  Criteria Met
--------------------------------------------------------------------------------
length                                      20.000
annealing temperature                       64.000
%cg_content                                  0.000
specificty                                   0.000
runs                                         0.000
repeats                                     20.000

===Reverse Primer=== cacattttgggaagttgcat         
Criterion                      Cost Function Score
--------------------------------------------------------------------------------
length                                      20.000
annealing temperature                       56.000
%cg_content                                  0.000
specificty                                   0.000
runs                                         0.000
repeats     

In [161]:
import pandas as pd
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [177]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
pd.DataFrame(d)

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [179]:
import pandas as pd
Criterion = ['length','annealing temperature', '%cg_content','specificity','runs','repeats']
F_Cost = [80,210,34,50,30,60]
R_Cost = [240,12,40,30,60,40]
F_Primer = {'Criterion': pd.Series(Criterion, index=[i for i in range(1,7)]), 
            'Cost Function Score': pd.Series(['{0:.3f}'.format(i) for i in F_Cost], index=[i for i in range(1,7)])}
pd.DataFrame(F_Primer)

Unnamed: 0,Cost Function Score,Criterion
1,80.0,length
2,210.0,annealing temperature
3,34.0,%cg_content
4,50.0,specificity
5,30.0,runs
6,60.0,repeats


### Task 10

In [None]:
class PrimerDesign(PrimerDesign): 
    
    def func_simulated_annealing(self):
        
        temperature = self.initial_temperature
        stopping_temperature = self.stopping_temperature
        drop = self.drop_fractionfunc
		
			
        
        pass 


### Store the DNA sequence given to you in the variable below 

In [None]:
dna_sequence = ''' '''

### Instantiate your class and read in the DNA sequence

### If you need to adjust any parameter from their default values in the init method, do it here

### Show the outcome of your testing and the functions in the subsequent cells 