# Translating RNA into Protein
**Given:** An RNA string *s* corresponding to a strand of mRNA (of length at most 10 kbp).

**Return:** The protein string encoded by *s*.

## RNA Codon Table

In [1]:
%%file RNA_Codon_Table.txt
UUU F      CUU L      AUU I      GUU V
UUC F      CUC L      AUC I      GUC V
UUA L      CUA L      AUA I      GUA V
UUG L      CUG L      AUG M      GUG V
UCU S      CCU P      ACU T      GCU A
UCC S      CCC P      ACC T      GCC A
UCA S      CCA P      ACA T      GCA A
UCG S      CCG P      ACG T      GCG A
UAU Y      CAU H      AAU N      GAU D
UAC Y      CAC H      AAC N      GAC D
UAA Stop   CAA Q      AAA K      GAA E
UAG Stop   CAG Q      AAG K      GAG E
UGU C      CGU R      AGU S      GGU G
UGC C      CGC R      AGC S      GGC G
UGA Stop   CGA R      AGA R      GGA G
UGG W      CGG R      AGG R      GGG G 

Overwriting RNA_Codon_Table.txt


# Sample Dataset

In [2]:
%%file Sample_Dataset.txt
AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA



Overwriting Sample_Dataset.txt


# Sample Output

In [3]:
%%file Sample_Output.txt
MAMAPRTEINSTRING



Overwriting Sample_Output.txt


# Solution

In [4]:
def parseCodonTable(codon_table_file_path = "RNA_Codon_Table.txt"):
    codon_table_file = open(codon_table_file_path,'r')
    codon_table_file_tokens = codon_table_file.read().strip().split()
    codons = codon_table_file_tokens[0::2]
    amino_acids = codon_table_file_tokens[1::2]
    codon_table = dict((codon, amino_acid) for codon, amino_acid in list(zip(codons, amino_acids)))
    return codon_table


In [5]:
def translateRNA(input_string, codon_table = parseCodonTable(), start_codon = "AUG"):
    "Given an RNA string s corresponding to a strand of mRNA (of length at most 10 kbp), return the protein string encoded by s."
    
    rna_after_start_codon = input_string[input_string.find(start_codon):]
    rna_codons = [rna_after_start_codon[i:i+3] for i in range(0, len(rna_after_start_codon), 3)]
    rna_proteins = [codon_table[codon] for codon in rna_codons]
    rna_proteins_before_stop = rna_proteins[0:rna_proteins.index("Stop")]
    rna_proteins_string = "".join(rna_proteins_before_stop)
    
    return rna_proteins_string

def translateRNAFromFileToFile(input_file_path, output_file_path, codon_table_file_path = "RNA_Codon_Table.txt", start_codon = "AUG"):
    "Wraps reverseComplementDNA to read from input_file_path and write to output_file_path"
    
    input_file = open(input_file_path,'r')
    output_file = open(output_file_path,'w')
    
    input_string = input_file.read().strip()
    output_string = translateRNA(input_string, parseCodonTable(codon_table_file_path), start_codon)
    
    output_file.write("%s\n" % output_string)
    
    input_file.close()
    output_file.close()
    
    return


# Test Solution

In [6]:
translateRNAFromFileToFile("Sample_Dataset.txt", "Test_Output.txt")

In [7]:
%%bash
echo Sample_Output.txt
md5sum Sample_Output.txt
cat Sample_Output.txt

Sample_Output.txt
8a609552ef070befd6f7ee06f7ba0405  Sample_Output.txt
MAMAPRTEINSTRING


In [8]:
%%bash
echo Test_Output.txt
md5sum Test_Output.txt
cat Test_Output.txt

Test_Output.txt
8a609552ef070befd6f7ee06f7ba0405  Test_Output.txt
MAMAPRTEINSTRING


In [9]:
%%bash
if [ $(md5sum Sample_Output.txt|cut -f1 -d' ') == $(md5sum Test_Output.txt|cut -f1 -d' ') ]
then
    echo Sample output matches test output.
else
    echo Sample output does not Match test output.
fi

Sample output matches test output.


# Downloaded Dataset

In [10]:
%%bash
cp ~/Downloads/rosalind_prot.txt ./
cat rosalind_prot.txt

AUGCUUGCGUCGGCAGGAUUACAAUCGAACCUCCAUUUACGCUAUAACAUUUCUGAAGUCCGUCUUAUCCGUCUCGCUCCCUUUACGUGUCGCCAGCCCGGAAUGAUCUUAGCGCCAGGAAAUUCAGAUAAAAGAACGAGUAGGAUCACCCACGGCCACGAUUCUCUCCGAGGUCUCAAUCCAUUGUGGAGUGAGCACGGUUGGGCCGCAUUGAUGCGGGGCGAUAUGUUUUGCCCUAUACCGACCUCGGUUGGUGUCACGCAGGUCUCAAGAGUCAAUCAAUUCAAAGGAGUGAAAAAUAGCACUCUCAGACAACAUCAGGGGCAAUCUCGGUCGCUGUAUCGAAGAAUAACUGAUAUUUCUGUUUCGGGUCGGACGUUAUGGAGUGGUCACCAUAGAGGUCGUUUUGAAUCCGUAUCCAUUAUAUGGAAUGUCGUUUUUGAACGUGUGAAUUAUGGCGGCGGCGCCCUACCACCGGAGCUGGGGCAGCCACCGGUAUCUCUCUGGGGUACCGGAGUAUCAUCGAGGCCGUUUACAUUCCAAGUACAGGCCAAAUUCCAAUUGUUCUGCCCAAGAAGCCGGCCAGUUAGUCCCAGUGCAAAGAUGCUAGACCGAUUUGCUAGUAGCUACAAACUCGAGGAACAAGGAUCGCCGUGCCUCAGUACAUCCUGUCCUGGGGUUCCCUUUAACCAAGGUGCUUGGAGCUCGAGGCGGCAAGCAGGUGUCCUACUUUCUAUUGCUGAUAACUCUUUCGACCAGUAUGCCCCACAAGGUAAGCCACAUCCCACUAAGGAGGAUAAUACCGUAGUGGUAGCUGCCAUUAUGACAUUACGGCGACGUCGGUCUAAAUACUACCACUGCGGUGCAGGAGGUGUGAAAACUGGCCGAUUCACUAGCGUAGUUUUUGGUCUACUGCGACUGUUAGAAAUGGGCCAGCUCGUUCGUCCCGAGCAACGAGUAUAUCACAGAGAUCAUCGGGAGGUAAGGAGGGACCACAACC

# Solution to Downloaded Dataset

In [11]:
translateRNAFromFileToFile("rosalind_prot.txt", "Solution_Output.txt")

In [12]:
%%bash
cat Solution_Output.txt

MLASAGLQSNLHLRYNISEVRLIRLAPFTCRQPGMILAPGNSDKRTSRITHGHDSLRGLNPLWSEHGWAALMRGDMFCPIPTSVGVTQVSRVNQFKGVKNSTLRQHQGQSRSLYRRITDISVSGRTLWSGHHRGRFESVSIIWNVVFERVNYGGGALPPELGQPPVSLWGTGVSSRPFTFQVQAKFQLFCPRSRPVSPSAKMLDRFASSYKLEEQGSPCLSTSCPGVPFNQGAWSSRRQAGVLLSIADNSFDQYAPQGKPHPTKEDNTVVVAAIMTLRRRRSKYYHCGAGGVKTGRFTSVVFGLLRLLEMGQLVRPEQRVYHRDHREVRRDHNLSSSELVLTGSTSWRKQASSTYAKGMHHAPKSCLRLPLERSSFNALHHHLADLLRVASSLVGDCLIESRMSRTSKLIRKTSRLNSRCIILYVIGTTCGIGPLRSNCLEPNRIPVLVIPRRPLPRRLETPRSAIIRCSLLARSELWPTISSSHQCRSSSYCGNTERPRLIPDADLDDRVCAVKSPTATTTLPCDNQHLRTHLQYVGAIQMHDGNWRKISYWRLHTGIRGPLHEILLIAGDGTLTGSYSGRAVSPLLCPSASRRGLCGDFASFLDAADRLGQDYPCFVCYYRRLLASRQTTGLNFLKRYSCHATPRNPIYDIVRLIQRVKWPSQENVSYSVVRECIRLEASINNSLLTCAGFCPMQLSWGRCGAASLAGLPQVRNGGYPPYNVTNYGIVRGTYVPSTQNRSGARRWSYALQWEITAHARIREGSLFDILRSKLPDTEVETQVMSPLVMPRPGYARRVHMPANYYSSLEVACGILPHYSRTRLFAKGMTGASILIQILSESPTASYAECTVFTPEASVIPKMYRSPARKQWIHSQKGTSSRKRAVRLWTHSSALHGGDGGSNTRFNTMSHIHIVSECQLRVTTTIVREFRRALTVVRRDAARCHKVPPFMSAKLHEGVLHGQFWDRESAEALAPVLTGPVPICSGLKRDQIIMPLPRYSA