## 'Translating RNA into Protein'

**Given**: An RNA string *s* corresponding to a strand of mRNA (of length at most 10 kbp).

**Return**: The protein string encoded by *s*.



In [27]:
def rna_prot_translator(rna_seq: str) -> str:
    '''
    For the RNA nucleotide string, translate into an amino acid (AA) residue string.

    Step 1: load RNA codon table
        Using data found at https://rosalind.info/glossary/rna-codon-table/
        Result in a dictionary where the 3-nt RNA codon is the key and the 1-aa residue is the value 
    Step 2: begin translation 
        Make sure that is the RNA codon translate 'Stop' that one does not include 'Stop' in the residue string
    
    Result: string of AA residues
    '''
    # step 1: load RNA codon table
    # Using data found at https://rosalind.info/glossary/rna-codon-table/
    # Result in a dictionary where the 3-nt RNA codon is the key and the 1-aa residue is the value
    with open("datasets/rosalind_RNA_codon_table.txt", "r") as f:
        text = f.read().replace('\n', ' ').split()
    codon_dict = {text[i]:text[i+1] for i in range(len(text)) if i%2==0}
    del text
    
    #
    aa_seq = ''
    for i in range(len(rna_seq)):
        if i%3 == 0 and codon_dict[ rna_seq[i:i+3] ] != 'Stop':
            aa_seq += codon_dict[ rna_seq[i:i+3] ]
    del codon_dict
    return aa_seq



In [29]:
with open('datasets/rosalind_sample_dataset.txt', 'r') as f:
    text = f.read().rstrip('\n')
print(text)
print(rna_prot_translator(text))
del text

AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA
MAMAPRTEINSTRING


---

In [30]:
with open('datasets/rosalind_prot.txt', 'r') as f:
    text = f.read().rstrip('\n')

print(rna_prot_translator(text))


MSHLKDHNVRRVFSRPVALQVHLTALEQPDSNGFRGLHRFTLDSRAEVPEKKASDLSRIISVSLRGKRDPNRIPIFRRLTWTLCALQFSSLTESEFIHPVFLDWVKKLRSKRDSGLHRVTLPLECSNWSGPLSSTRNASRRTFSPPVNRGKRWPFPVPSEMTLWPCFAQANRTNSLSCEGEGPEPQRFPLTGKLNFSPSLLIVEMRLLRSGTYEPMVKWMTGTPILIIALTTRSTCLKVEVHKTPSIEPAAMQVRFSKFSQVVKARGRNLAITVYYERSSDRKIKPTFQKDLMVRIMRPALILVQAAKYAYVLIVIARRYLRYILFTRSGIAGHAFKVPTKGIAFKASFERASNRISADISRSRGDGYIIVRSGHASWQALHCNHLGFLCCRYAKCAMLSNGESLPRDLDNPNWVGGVPSITHGLFPCPLLSELGHLISSKPHASLPARCSFRALGAPKTEIDRANCDQLVSILWAGGNGCRLDMSGPGLICSIIGILTESHVHVARTLPVVRSSHRISSDIGQGKYIRVEVPALYQKDLYTFARSRTPPQSTLTTYYGRTVQSSGAPAAKGNGLYARCASATFLTQPICQLMHLRLSAKTSLVARPAPTPLALNPDLLEIRVIPETNGLHETRSSKLITYRYLRNTAMCEVCLPLWSAIWFGRVKCEPFFLVFGKVQGWEASRRRRRGIHVHSSCARKRGLSAKSRTIQGFDDRNIRRVERKVYVGPLRHNALMPVCRPSYRPTGYPCAYQNSHFVMPLLCTPHFHKTHSDGADQSNARAFIDLVRCTWRGLLKSRKTRKAREVLLALTALHFESDQLVATECQLPAMLFAGSKRHGFYVRHYLKRYLSSRLQIITLVYCAQACTPDLSASKRLPCQTLECLQFCLILGQCVTQPPHRERYFFLSRLPWDGMCRLGPPLRLHCHSTLGSDGYFAVTCRGILHEVERVYNQYNGACTVGSRHTQNMRRAATYSSEARQQAQQGFWCSRRPCPSGICLPER