In [None]:
import random
import math

### The init method initializes all parameters to zero. 

#If any parameter needs to have a default value, you can set it here.
#All penalty parameters are set to a default value of 10

In [2]:
class PrimerDesign(object):
    
    def __init__ (self):
        
        '''parameters for the length criterion'''
        self.max_length = 18 #long enough for adequate specificity
        self.min_length = 22 #easily bind to template at annealing temperature
        self.penalty_length = 10 
        
        '''parameters for the temperature difference criterion'''
        self.max_tdiff = 15 #if annealing temperature of forward and reverse primer is too large, primers will not anneal properly
        self.min_tdiff = -5
        self.penalty_tdiff = 10
        
        '''parameters for the cg content criterion'''
        self.max_cg = 0.6 #GC-rich tracts tend to form inverted repeats, or hairpin structures, thus will not anneal properly to the target DNA template
        self.min_cg = 0.4 #AT-rich strectches are hard to amplify under standard reaction conditions
        self.penalty_cg = 10
        
        '''parameters for the annealing temperature criterion'''
        self.max_temp = 65 #if too high, primer might not bind because bonds are broken as they are formed
        self.min_temp = 50 #if too low, primer could bind imperfectly
        self.penalty_temp = 10
        
        '''parameters for the run criterion'''
        self.run_threshold = 4 #likely to have errors in replication
        self.penalty_runs = 10
        
        '''parameters for the repeat criterion'''
        self.repeat_threshold = 2 #likely to have errors in replication
        self.penalty_repeats = 10
        
        '''parameters for the specificity criterion'''
        self.penalty_specificity = 10 
        
        '''locations where the forward primer should be chosen from'''
        self.fp_start = 100
        self.fp_end = 200
        
        '''locations where the reverse primer should be chosen from'''
        self.rp_start = 300
        self.rp_end = 400
        
        ''' parameters for the simulated annealing portion'''
        self.initial_temperature = 200
        self.stopping_temperature = 0.01
        self.drop_fraction = 0.999
        

### Task 2 

### This method cleans a DNA sequence

#All spaces, numbers and capital letters are removed

In [3]:
class PrimerDesign(PrimerDesign): 
    
    def set_dna_sequence(self, dna_sequence):
        #list of bases
        nucleotides = ["a", "t", "c", "g"]
        output_list = []
        
        #checks input DNA sequence for bases and append them to a list
        for char in dna_sequence: 
            if char in nucleotides: 
                output_list.append(char)
        
        #converts list into single line string
        dna_sequence = "".join(output_list)
        self.dna_sequence = dna_sequence
        return self.dna_sequence

#Test Case
#Run this again after running all cells to instantiate test again

In [52]:
test_sq = '3721 cccactgggc ccagaaaggc agccaccaaa ttagcctgga caaccctgac taccagcagg 3781 acttctttcc caaggaagcc aagccaaatg gcatctttaa gggctccaca gctgaaaatg 3841 cagaatacct aagggtcgcg ccacaaagca gtgaatttat tggagcatga ccacggagga 3901 tagtatgagc cctaaaaatc cagactcttt cgatacccag gaccaagcca cagcaggtcc 3961 tccatcccaa cagccatgcc cgcattagct cttagaccca cagactggtt ttgcaacgtt 4021 tacaccgact agccaggaag tacttccacc tcgggcacat tttgggaagt tgcattcctt 4081 tgtcttcaaa ctgtgaagca tttacagaaa cgcatccagc aagaatattg tccctttgag 4141 cagaaattta tctttcaaag aggtatattt gaaaaaaaaa aaaagtatat gtgaggattt 4201 ttattgattg gggatcttgg agtttttcat tgtcgctatt gatttttact tcaatgggct 4261 cttccaacaa ggaagaagct tgctggtagc acttgctacc ctgagttcat ccaggcccaa 4321 ctgtgagcaa ggagcacaag ccacaagtct tccagaggat gcttgattcc agtggttctg 4381 cttcaaggct tccactgcaa aacactaaag atccaagaag gccttcatgg ccccagcagg'
test = PrimerDesign()
test.set_dna_sequence(test_sq)
print(test.dna_sequence, len(test.dna_sequence))

cccactgggcccagaaaggcagccaccaaattagcctggacaaccctgactaccagcaggacttctttcccaaggaagccaagccaaatggcatctttaagggctccacagctgaaaatgcagaatacctaagggtcgcgccacaaagcagtgaatttattggagcatgaccacggaggatagtatgagccctaaaaatccagactctttcgatacccaggaccaagccacagcaggtcctccatcccaacagccatgcccgcattagctcttagacccacagactggttttgcaacgtttacaccgactagccaggaagtacttccacctcgggcacattttgggaagttgcattcctttgtcttcaaactgtgaagcatttacagaaacgcatccagcaagaatattgtccctttgagcagaaatttatctttcaaagaggtatatttgaaaaaaaaaaaaagtatatgtgaggatttttattgattggggatcttggagtttttcattgtcgctattgatttttacttcaatgggctcttccaacaaggaagaagcttgctggtagcacttgctaccctgagttcatccaggcccaactgtgagcaaggagcacaagccacaagtcttccagaggatgcttgattccagtggttctgcttcaaggcttccactgcaaaacactaaagatccaagaaggccttcatggccccagcagg 720


### Task 3

### This method selects a forward and reverse primer at random with a specified length

In [5]:
import random 

class PrimerDesign(PrimerDesign):
    
    def func_select_random(self, sqtype='forward', length = 20 ):
        
        '''the length has to be a positive number'''
        
        if sqtype == 'forward':
            output_list = []
            start_limit = self.fp_start 
            end_limit = self.fp_end
            #randomly generate a starting position for the primer
            #the length of the primer is accounted for in the end limit
            start_pos = random.randint(start_limit, end_limit-length)
            #generate primer by slicing the dna sequence
            self.forward_primer = self.dna_sequence[start_pos:start_pos+length]
            return self.forward_primer 
        
        elif sqtype == 'reverse':
            start_limit = self.rp_start 
            end_limit = self.rp_end
            start_pos= random.randint(start_limit, end_limit-length)
            self.reverse_primer = self.dna_sequence[start_pos: start_pos+length]
            return self.reverse_primer

#Test Case

In [11]:
fp = test.func_select_random('forward')
rp = test.func_select_random('reverse')
print(fp)
print(rp)

acggaggatagtatgagccc
gaagtacttccacctcgggc


### Task 4

### This method calculates properties for a given primer string
#They are defined in the following order:
length, fraction of cg bases, annealing temperature, number of runs and number of repeats

In [12]:
class PrimerDesign(PrimerDesign): 
    
    def func_length(self, sq):
        return len(sq)
    
    def func_cg_fraction(self, sq):
        count = 0
        #checks primer for c & g and adds to the counter
        for base in sq:
            if base == 'c' or base == 'g':
                count+=1
        return count/len(sq)
    
    def func_temperature(self,sq):
        #calculates optimal temperature based on number of each base
        optimal_temp = 4*(sq.count('c') + sq.count('g')) + 2*(sq.count('a') + sq.count('t'))
        return optimal_temp

#Test Case

In [15]:
print(fp)
print(test.func_length(fp))
print(test.func_cg_fraction(fp))
print(test.func_temperature(fp))

acggaggatagtatgagccc
20
0.55
62


In [16]:
class PrimerDesign(PrimerDesign):

    def func_count_runs(self,sq):
        #converts primer string to list
        sq_list = [i for i in sq]
        runs = 0
        count = 0
        base = ' '
        for i in sq_list:
            #adds to count if base is same as previous
            if i == base:
                count += 1
                
            else:
                if count >= self.run_threshold: #checks if count of previous base run exceeds threshold
                    runs += 1 #update number of runs
                count = 0 #resets count for new base
                base = i #if base different from previous, variable base is changed to current base
        if count >= self.run_threshold:
            runs += 1
        return runs

#Test Case

In [19]:
print(fp)
print(test.func_count_runs(fp))
x = 'aaaaabaa'
print(x)
print(test.func_count_runs(x))
y = 'aattttcccccggggg'
print(y)
print(test.func_count_runs(y))

acggaggatagtatgagccc
0
aaaaabaa
1
aattttcccccggggg
2


In [20]:
class PrimerDesign(PrimerDesign):
    def func_count_repeats(self,sq):
        di_repeats = ['at','ac','ag','ca','ct','cg','ga','gt','gc','ta','tc','tg']
        repeats = 0
        count = 0

        for i in di_repeats:
            #makes a copy of primer sequence
            x = sq[:]
            while len(x)>= 2:
                #if first 2 characters of sequence is same as di_repeat
                if x[0] + x[1] == i:
                    while len(x) >= 2 and x[0] + x[1] == i:
                        count += 1 #adds to count
                        x = x[2:] #removes first 2 characters
                    
                    #when encounter something that is not the di_repeat checked for, checks if theres a repeat
                    if count > 1:
                        repeats += count - 1
                    count = 0

                    #after encountering the first di_repeat in sequence if not di_repeat, remove first 1 character
                    while len(x) >= 2 and x[0] + x[1] != i:
                        x = x[1:]
                
                #before encountering first di_repeat
                else:
                    x = x[1:]
                    
            #when finish checking for sequence for a di_rpeeat, checks if last part has repeats        
            if count > 1:
                repeats += count -1
            count = 0
            
        return repeats

#Test Case

In [21]:
x = 'atatatcgtata'
y = 'acacacttcgcgcgcg'
z = 'gtacacacttacacacag'
test = PrimerDesign()
print(test.func_count_repeats(x))
print(test.func_count_repeats(y))
print(test.func_count_repeats(z))

4
8
7


### Task 5

### This method calculates the cost for each criteria

In [23]:
class PrimerDesign(PrimerDesign):
    
    def cost_length(self, sq):
        '''This is given to you as an example '''
        sq_len = len(sq)
        if(sq_len > self.max_length):
            return (sq_len - self.max_length)*self.penalty_length
        elif(sq_len > self.min_length):
            return 0
        else:
            return (self.min_length - sq_len)*self.penalty_length 
    
    def cost_temperature(self, sq):
        temp = self.func_temperature(sq)
        if temp > self.max_temp:
            cost_temp = self.penalty_temp*(temp-self.max_temp)
        elif temp > self.min_temp:
            cost_temp = 0
        else:
            cost_temp = self.penalty_temp*(self.min_temp-temp)
        return cost_temp
        
    def cost_cgcontent(self,sq):
        cg_content = self.func_cg_fraction(sq)
        if cg_content > 0.6:
            cost_cgcontent = self.penalty_cg*(cg_content-0.6)
        elif cg_content >= 0.4:
            cost_cgcontent = 0
        else:
            cost_cgcontent = self.penalty_cg*(0.4-cg_content)
        return cost_cgcontent
        
    def cost_temperature_difference(self, fp, rp):
        temp_fp = self.func_temperature(fp)
        temp_rp = self.func_temperature(rp)
        temp_diff = abs(temp_fp-temp_rp)
        if temp_diff > self.max_tdiff:
            cost_temp_diff = self.penalty_tdiff*(temp_diff-self.max_tdiff)
        else:
            cost_temp_diff = 0
        return cost_temp_diff
    
    def cost_specificity(self, sq):
        return self.penalty_specificity*(self.dna_sequence.count(sq)-1)
    
    def cost_runs(self, sq):
        return self.func_count_runs(sq) * self.penalty_runs
    
    def cost_repeats(self,sq):
        return self.func_count_repeats(sq) * self.penalty_repeats

### Task 6

### This method sums the cost for each criteria for both primers

In [24]:
class PrimerDesign(PrimerDesign):
    
    def cost_objective_function(self, fp, rp):
        '''complete the calculation of the cost'''
        
        cost_fp = self.cost_length(fp) + self.cost_temperature(fp) + self.cost_cgcontent(fp) + self.cost_specificity(fp) + self.cost_runs(fp) + self.cost_repeats(fp)
        cost_rp = self.cost_length(rp) + self.cost_temperature(rp) + self.cost_cgcontent(rp) + self.cost_specificity(rp) + self.cost_runs(rp) + self.cost_repeats(rp)
        total_cost = cost_fp + cost_rp + self.cost_temperature_difference(fp,rp)
        return total_cost 

#Test Case

In [28]:
print(test.cost_objective_function(fp,rp))

40


### Task 7

### This method prints a receipt of the cost of producing the randomly selected forward and reverse primers

In [42]:
class PrimerDesign(PrimerDesign):
    
    def cost_objective_function_info(self, fp, rp):
        Criterion = ['length','annealing temperature', '%cg_content','specificity','runs','repeats']
        F_Cost = [self.func_length(fp), self.func_temperature(fp), self.cost_cgcontent(fp), self.cost_specificity(fp), self.cost_runs(fp), self.cost_repeats(fp)]
        F_Criterion_Met = ['True' if i==0 else 'False' for i in F_Cost]
        
        R_Cost = [self.func_length(rp), self.func_temperature(rp), self.cost_cgcontent(rp), self.cost_specificity(rp), self.cost_runs(rp), self.cost_repeats(rp)]
        R_Criterion_Met = ['True' if i==0 else 'False' for i in R_Cost]

        line = '{:<80}'.format('===Forward Primer=== {}'.format(fp)) + '\n' 
        line += '{:<25}'.format('Criterion') + '{:>25}'.format('Cost Function Score') + '{:>30}'.format('Criteria Met') + '\n' 
        line += '-'*80 + '\n'
        line += '{:<25}'.format(Criterion[0]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[0])) + '{:>30}'.format(F_Criterion_Met[0]) + '\n'
        line += '{:<25}'.format(Criterion[1]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[1])) + '{:>30}'.format(F_Criterion_Met[1]) + '\n'
        line += '{:<25}'.format(Criterion[2]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[2])) + '{:>30}'.format(F_Criterion_Met[2]) + '\n'
        line += '{:<25}'.format(Criterion[3]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[3])) + '{:>30}'.format(F_Criterion_Met[3]) + '\n'
        line += '{:<25}'.format(Criterion[4]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[4])) + '{:>30}'.format(F_Criterion_Met[4]) + '\n'
        line += '{:<25}'.format(Criterion[5]) + '{:>25}'.format('{0:.3f}'.format(F_Cost[5])) + '{:>30}'.format(F_Criterion_Met[5]) + '\n'
        line += '\n'

        line += '{:<50}'.format('===Reverse Primer=== {}'.format(rp)) + '\n' 
        line += '{:<25}'.format('Criterion') + '{:>25}'.format('Cost Function Score') + '{:>30}'.format('Criteria Met') + '\n' 
        line += '-'*80 + '\n'
        line += '{:<25}'.format(Criterion[0]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[0])) + '{:>30}'.format(R_Criterion_Met[0]) + '\n'
        line += '{:<25}'.format(Criterion[1]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[1])) + '{:>30}'.format(R_Criterion_Met[1]) + '\n'
        line += '{:<25}'.format(Criterion[2]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[2])) + '{:>30}'.format(R_Criterion_Met[2]) + '\n'
        line += '{:<25}'.format(Criterion[3]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[3])) + '{:>30}'.format(R_Criterion_Met[3]) + '\n'
        line += '{:<25}'.format(Criterion[4]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[4])) + '{:>30}'.format(R_Criterion_Met[4]) + '\n'
        line += '{:<25}'.format(Criterion[5]) + '{:>25}'.format('{0:.3f}'.format(R_Cost[5])) + '{:>30}'.format(R_Criterion_Met[5]) + '\n'
        line += '\n'

        line += '{:<25}'.format('Temperature Difference') + '{:>25}'.format('{0:.3f}'.format(self.cost_objective_function(fp,rp))) + '\n'
        
        print (line)

In [45]:
test.cost_objective_function_info(fp,rp)

===Forward Primer=== acggaggatagtatgagccc                                       
Criterion                      Cost Function Score                  Criteria Met
--------------------------------------------------------------------------------
length                                      20.000                         False
annealing temperature                       62.000                         False
%cg_content                                  0.000                          True
specificity                                  0.000                          True
runs                                         0.000                          True
repeats                                      0.000                          True

===Reverse Primer=== gaagtacttccacctcgggc         
Criterion                      Cost Function Score                  Criteria Met
--------------------------------------------------------------------------------
length                                      20.000       

### This method uses pandas to print a similar receipt

In [48]:
def color_false(s):
    if s == 'False':
        color = 'red'
    elif s == 'True':
        color = 'green'
    else:
        color = 'black'
    return 'color: %s' % color

In [51]:
import pandas as pd

class PrimerDesign(PrimerDesign):
    
    def cost_objective_function_info(self, fp, rp):
        Criterion = ['length','annealing temperature', '%cg_content','specificity','runs','repeats']
        F_Cost = [self.func_length(fp),
                  self.func_temperature(fp),
                  self.cost_cgcontent(fp),
                  self.cost_specificity(fp),
                  self.cost_runs(fp),
                  self.cost_repeats(fp)]
        F_Criterion_Met = ['True' if i==0 else 'False' for i in F_Cost]
        F_Primer = {'Criterion': Criterion, 
                    'Cost Function Score': pd.Series(['{0:.3f}'.format(i) for i in F_Cost], index=[i for i in range(1,7)]), 
                    'Criterion Met':F_Criterion_Met}
        
        R_Cost = [self.func_length(rp),
                  self.func_temperature(rp),
                  self.cost_cgcontent(rp),
                  self.cost_specificity(rp),
                  self.cost_runs(rp),
                  self.cost_repeats(rp)]
        R_Criterion_Met = ['True' if i==0 else 'False' for i in R_Cost]
        R_Primer = {'Criterion': Criterion, 
                    'Cost Function Score': pd.Series(['{0:.3f}'.format(i) for i in R_Cost], index=[i for i in range(1,7)]), 
                    'Criterion Met':R_Criterion_Met}
        return (F_Primer, R_Primer)

In [53]:
print('Forward Primer')
pd.DataFrame.from_dict(test.cost_objective_function_info(fp,rp)[0])[['Criterion', 'Cost Function Score', 'Criterion Met']].style.applymap(color_false)

Forward Primer


Unnamed: 0,Criterion,Cost Function Score,Criterion Met
1,length,20.0,False
2,annealing temperature,62.0,False
3,%cg_content,0.0,True
4,specificity,0.0,True
5,runs,0.0,True
6,repeats,0.0,True


In [55]:
print('Reverse Primer')
pd.DataFrame.from_dict(test.cost_objective_function_info(fp,rp)[1])[['Criterion', 'Cost Function Score', 'Criterion Met']].style.applymap(color_false)

Reverse Primer


Unnamed: 0,Criterion,Cost Function Score,Criterion Met
1,length,20.0,False
2,annealing temperature,64.0,False
3,%cg_content,0.0,True
4,specificity,0.0,True
5,runs,0.0,True
6,repeats,0.0,True


### Task 10

In [None]:
class PrimerDesign(PrimerDesign): 
    
    def func_simulated_annealing(self):
        
        temperature = self.initial_temperature
        stopping_temperature = self.stopping_temperature
        drop = self.drop_fractionfunc
        
        pass 


### Store the DNA sequence given to you in the variable below 

In [None]:
dna_sequence = ''' '''

### Instantiate your class and read in the DNA sequence

### If you need to adjust any parameter from their default values in the init method, do it here

### Show the outcome of your testing and the functions in the subsequent cells 