In [14]:
'''
Translate a DNA sequence ->  mRNA (using 'U' instead of 'Thr (T) - ') -> into an aminoacid sequence (protein)
PS: The input should be multiple of 3 (example: 3 nucleotides, 6 nucleotides, etc.)
'''

from textwrap import wrap
def translatedDNA(DNA: str):
    
    # complement
    DNA = DNA.upper()
    print("DNA input: ", DNA)
    nucleotides = {'A','T','C','G'}
    if not set(DNA).issubset(nucleotides):
        print('Invalid sequence')
        return
    
    length = len(DNA)
    if not (length % 3) == 0:
        print('Invalid sequence')
        return

    
    compList = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    comp = ''.join([compList[base] for base in DNA])
    print("Complement: ", comp)

    mRNA = comp.replace("T", "U")
    print("mRNA: ", mRNA)

    # codon table imported from https://gist.github.com/juanfal/09d7fb53bd367742127e17284b9c47bf
    codontable = {
    'TCA': 'Ser (S) - ',    # Serina
    'TCC': 'Ser (S) - ',    # Serina
    'TCG': 'Ser (S) - ',    # Serina
    'TCT': 'Ser (S) - ',    # Serina
    'TTC': 'Phe (F) - ',    # Fenilalanina
    'TTT': 'Phe (F) - ',    # Fenilalanina
    'TTA': 'Leu (L) - ',    # Leucina
    'TTG': 'Leu (L) - ',    # Leucina
    'TAC': 'Tyr (Y) - ',    # Tirosina
    'TAT': 'Tyr (Y) - ',    # Tirosina
    'TAA': '* - ',    # Stop
    'TAG': '* - ',    # Stop
    'TGC': 'Cys (C) - ',    # Cisteina
    'TGT': 'Cys (C) - ',    # Cisteina
    'TGA': '* - ',    # Stop
    'TGG': 'Trp (W) - ',    # Triptofano
    'CTA': 'Leu (L) - ',    # Leucina
    'CTC': 'Leu (L) - ',    # Leucina
    'CTG': 'Leu (L) - ',    # Leucina
    'CTT': 'Leu (L) - ',    # Leucina
    'CCA': 'Pro (P) - ',    # Prolina
    'CCC': 'Pro (P) - ',    # Prolina
    'CCG': 'Pro (P) - ',    # Prolina
    'CCT': 'Pro (P) - ',    # Prolina
    'CAC': 'His (H) - ',    # Histidina
    'CAT': 'His (H) - ',    # Histidina
    'CAA': 'Gln (Q) - ',    # Glutamina
    'CAG': 'Gln (Q) - ',    # Glutamina
    'CGA': 'Arg (R) - ',    # Arginina
    'CGC': 'Arg (R) - ',    # Arginina
    'CGG': 'Arg (R) - ',    # Arginina
    'CGT': 'Arg (R) - ',    # Arginina
    'ATA': 'Ile (I) - ',    # Isoleucina
    'ATC': 'Ile (I) - ',    # Isoleucina
    'ATT': 'Ile (I) - ',    # Isoleucina
    'ATG': 'Met (M) - ',    # Methionina
    'ACA': 'Thr (T) - ',    # Treonina
    'ACC': 'Thr (T) - ',    # Treonina
    'ACG': 'Thr (T) - ',    # Treonina
    'ACT': 'Thr (T) - ',    # Treonina
    'AAC': 'Asn (N) - ',    # Asparagina
    'AAT': 'Asn (N) - ',    # Asparagina
    'AAA': 'Lys (K) - ',    # Lisina
    'AAG': 'Lys (K) - ',    # Lisina
    'AGC': 'Ser (S) - ',    # Serina
    'AGT': 'Ser (S) - ',    # Serina
    'AGA': 'Arg (R) - ',    # Arginina
    'AGG': 'Arg (R) - ',    # Arginina
    'GTA': 'Val (V) - ',    # Valina
    'GTC': 'Val (V) - ',    # Valina
    'GTG': 'Val (V) - ',    # Valina
    'GTT': 'Val (V) - ',    # Valina
    'GCA': 'Ala (A) - ',    # Alanina
    'GCC': 'Ala (A) - ',    # Alanina
    'GCG': 'Ala (A) - ',    # Alanina
    'GCT': 'Ala (A) - ',    # Alanina
    'GAC': 'Asp (D) - ',    # Acido Aspartico
    'GAT': 'Asp (D) - ',    # Acido Aspartico
    'GAA': 'Glu (E) - ',    # Acido Glutamico
    'GAG': 'Glu (E) - ',    # Acido Glutamico
    'GGA': 'Gly (G) - ',    # Glicina
    'GGC': 'Gly (G) - ',    # Glicina
    'GGG': 'Gly (G) - ',    # Glicina
    'GGT': 'Gly (G) - '   # Glicina
}
    comp = wrap(comp, 3)

    protein = ''.join([codontable[codon] for codon in comp])
    protein = protein[:-3]
    print('Aminoacid: ', protein)

translatedDNA('acgatacgccgacatcggacgttt') # INPUT



DNA input:  ACGATACGCCGACATCGGACGTTT
Complement:  TGCTATGCGGCTGTAGCCTGCAAA
mRNA:  UGCUAUGCGGCUGUAGCCUGCAAA
Aminoacid:  Cys (C) - Tyr (Y) - Ala (A) - Ala (A) - Val (V) - Ala (A) - Cys (C) - Lys (K)


In [15]:
'''
Provide the frequency of each RNA codon encoding a given aminoacid, in a DNA 
sequence
PS: 
• Please provide all possibilities of the codon 
combination.
• Input max. 3 Aminoacids.

'''

import itertools

def mRNA_combinations(protein: str):
    codontable = {
    'A': ['GCU', 'GCC', 'GCA', 'GCG'],  # Alanine
    'C': ['UGU', 'UGC'],                # Cysteine
    'D': ['GAU', 'GAC'],                # Aspartic Acid
    'E': ['GAA', 'GAG'],                # Glutamic Acid
    'F': ['UUU', 'UUC'],                # Phenylalanine
    'G': ['GGU', 'GGC', 'GGA', 'GGG'],  # Glycine
    'H': ['CAU', 'CAC'],                # Histidine
    'I': ['AUU', 'AUC', 'AUA'],         # Isoleucine
    'K': ['AAA', 'AAG'],                # Lysine
    'L': ['UUA', 'UUG', 'CUA', 'CUC', 'CUG', 'CUU'],  # Leucine
    'M': ['AUG'],                       # Methionine
    'N': ['AAU', 'AAC'],                # Asparagine
    'P': ['CCU', 'CCC', 'CCA', 'CCG'],  # Proline
    'Q': ['CAA', 'CAG'],                # Glutamine
    'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],  # Arginine
    'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'],  # Serine
    'T': ['ACU', 'ACC', 'ACA', 'ACG'],  # Threonine
    'V': ['GUU', 'GUC', 'GUA', 'GUG'],  # Valine
    'W': ['UGG'],                       # Tryptophan
    'Y': ['UAU', 'UAC'],                # Tyrosine
    '*': ['UAA', 'UAG', 'UGA'],         # Stop
}


    codon_combos = [codontable[aa] for aa in protein]  

    # all possible combinations
    all_combos = list(itertools.product(*codon_combos))

     # count codon frequency for all possible combinations
    for combination in all_combos:
        mRNA = "".join(combination)
        codon_count = {}

        for codon in combination:
            if codon in codon_count:
                codon_count[codon] += 1
            else:
                codon_count[codon] = 1

        print("mRNA =", mRNA)
        for codon, count in codon_count.items():
            print(f"{codon} = {count}")
        print("═══════•●•═══════")


aminoAcid = input("Enter amino acids (max 3 letters): ").upper()
if len(aminoAcid) > 3:
    print("Invalid amino acid input")
else:
    mRNA_combinations(aminoAcid)
    

mRNA = UAUUAUUAU
UAU = 3
═══════•●•═══════
mRNA = UAUUAUUAC
UAU = 2
UAC = 1
═══════•●•═══════
mRNA = UAUUACUAU
UAU = 2
UAC = 1
═══════•●•═══════
mRNA = UAUUACUAC
UAU = 1
UAC = 2
═══════•●•═══════
mRNA = UACUAUUAU
UAC = 1
UAU = 2
═══════•●•═══════
mRNA = UACUAUUAC
UAC = 2
UAU = 1
═══════•●•═══════
mRNA = UACUACUAU
UAC = 2
UAU = 1
═══════•●•═══════
mRNA = UACUACUAC
UAC = 3
═══════•●•═══════
