Rosalind_16.ipynb problem16 Nicholas Rose BME 205

Peptide Encoding Problem:
Find substrings of a genome encoding a given amino acid sequence.
Given: A DNA string Text and an amino acid string Peptide.
Return: All substrings of Text encoding Peptide (if any such substrings exist).

In [61]:
class DNASeq():
    '''
    Creates an object (DNASeq) from a given string sequence.
    Includes a conversion table from trinucleotides to amino acids
    '''
    
    table = {'UUU':'F', 'UUC':'F', 'UUA':'L', 'UUG':'L', 'CUU':'L', 'CUC':'L', 'CUA':'L',
            'CUG':'L', 'AUU':'I', 'AUC':'I', 'AUA':'I', 'AUG':'M', 'GUU':'V', 'GUC':'V',
            'GUA':'V', 'GUG':'V', 'UCU':'S', 'UCC':'S', 'UCA':'S', 'UCG':'S', 'CCU':'P',
            'CCC':'P', 'CCA':'P', 'CCG':'P', 'ACU':'T', 'ACC':'T', 'ACA':'T', 'ACG':'T',
            'GCU':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A', 'UAU':'Y', 'UAC':'Y', 'UAA':'_',
            'UAG':'_', 'CAU':'H', 'CAC':'H', 'CAA':'Q', 'CAG':'Q', 'AAU':'N', 'AAC':'N', 
            'AAA':'K', 'AAG':'K', 'GAU':'D', 'GAC':'D', 'GAA':'E', 'GAG':'E', 'UGU':'C',
            'UGC':'C', 'UGA':'_', 'UGC':'W', 'CGU':'R', 'CGC':'R', 'CGA':'R', 'CGG':'R', 
            'AGU':'S', 'AGC':'S', 'AGA':'R', 'AGG':'R', 'GGU':'G', 'GGC':'G', 'GGA':'G', 
            'GGG':'G'}
    
    def __init__ (self, DNASeq):
        self.DNASeq = DNASeq
        
    def reverseCompliment(self, seq):
        '''
        Returns the reverse compliment of a given DNA sequence
        '''
        
        compliment = ''
        for nucleotide in seq:
            if nucleotide == 'A':
                compliment += 'T'
            if nucleotide == 'T':
                compliment += 'A'
            if nucleotide == 'C':
                compliment += 'G'
            if nucleotide == 'G':
                compliment += 'C'
                
        return compliment[::-1]
        
        
    def transcribe(self, seq):
        '''
        Returns the RNA transcription of a given DNA sequence
        '''
        
        RNA = ''
        for nucleotide in seq:
            RNA += nucleotide.replace('T', 'U')
        return RNA
    
            
    def translate(self, seq):
        '''
        Returns the Peptide translation of a given RNA sequence
        '''
        
        peptide = ''
        for i in range(0, len(seq), 3):
            if seq[i:i+3] in self.table:
                peptide += self.table[seq[i:i+3]]
        return peptide
                
                
    def findPeptide(self, seq, compliment, peptide):
        '''
        Method to identify DNA substrings that encode a given peptide.
        Method takes a peptide and a DNA sequence, as well as Boolean
        of if the DNA sequence is a reverse compliment.
        If False, then the substrings are returned.
        If True, then reverse compliments of the substrings are returned.
        '''
        
        seqs = []
        lenWindow = len(peptide) * 3
        for i in range(len(seq[:-lenWindow])):
            window = seq[i:i+lenWindow]
            RNA = self.transcribe(window)
            translation = self.translate(RNA)
            
            if translation == peptide:
                if compliment == False:
                    seqs.append(window)
                if compliment == True:
                    seqs.append(self.reverseCompliment(window))
        
        return seqs

In [63]:
def main(infile):
    '''
    The main method. This method takes file containing a row with
    a peptide, and row with a DNA sequence string.
    Prints output into a file named 'rosalind_16.txt.out', 
    containing rows of returned substrings.
    '''
    
    with open(infile) as f:
        DNA = f.readline().strip()
        Peptide = f.readline().strip()
    
    seqObject = DNASeq(DNA)
    
    complimentDNA = seqObject.reverseCompliment(DNA)
    
    substrings = seqObject.findPeptide(DNA, False, Peptide)
    substrings = substrings + seqObject.findPeptide(complimentDNA, True, Peptide)
    substrings.sort
    
    with open('rosalind_16.txt.out', 'w') as out:
            for i in substrings:
                print(i, file=out)
    
if __name__ == "__main__":
    main('/home/nick_rose/Downloads/rosalind_ba4b.txt')