In [13]:
# DNA to mRNA and Amino Acid Translation

# Codon Table
genetic_code = {
    'UUU': 'Phe (F)', 'UUC': 'Phe (F)', 'UUA': 'Leu (L)', 'UUG': 'Leu (L)',
    'UCU': 'Ser (S)', 'UCC': 'Ser (S)', 'UCA': 'Ser (S)', 'UCG': 'Ser (S)',
    'UAU': 'Tyr (Y)', 'UAC': 'Tyr (Y)', 'UAA': 'Stop',    'UAG': 'Stop',
    'UGU': 'Cys (C)', 'UGC': 'Cys (C)', 'UGA': 'Stop',    'UGG': 'Trp (W)',
    'CUU': 'Leu (L)', 'CUC': 'Leu (L)', 'CUA': 'Leu (L)', 'CUG': 'Leu (L)',
    'CCU': 'Pro (P)', 'CCC': 'Pro (P)', 'CCA': 'Pro (P)', 'CCG': 'Pro (P)',
    'CAU': 'His (H)', 'CAC': 'His (H)', 'CAA': 'Gln (Q)', 'CAG': 'Gln (Q)',
    'CGU': 'Arg (R)', 'CGC': 'Arg (R)', 'CGA': 'Arg (R)', 'CGG': 'Arg (R)',
    'AUU': 'Ile (I)', 'AUC': 'Ile (I)', 'AUA': 'Ile (I)', 'AUG': 'Met (M)',
    'ACU': 'Thr (T)', 'ACC': 'Thr (T)', 'ACA': 'Thr (T)', 'ACG': 'Thr (T)',
    'AAU': 'Asn (N)', 'AAC': 'Asn (N)', 'AAA': 'Lys (K)', 'AAG': 'Lys (K)',
    'AGU': 'Ser (S)', 'AGC': 'Ser (S)', 'AGA': 'Arg (R)', 'AGG': 'Arg (R)',
    'GUU': 'Val (V)', 'GUC': 'Val (V)', 'GUA': 'Val (V)', 'GUG': 'Val (V)',
    'GCU': 'Ala (A)', 'GCC': 'Ala (A)', 'GCA': 'Ala (A)', 'GCG': 'Ala (A)',
    'GAU': 'Asp (D)', 'GAC': 'Asp (D)', 'GAA': 'Glu (E)', 'GAG': 'Glu (E)',
    'GGU': 'Gly (G)', 'GGC': 'Gly (G)', 'GGA': 'Gly (G)', 'GGG': 'Gly (G)'
}

def get_complement(dna_sequence):
    complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    return ''.join(complement[base] for base in dna_sequence)

def transcribe_dna_to_mrna(dna_sequence):
    return dna_sequence.replace('T', 'U')

def translate_mrna_to_protein(mrna_sequence):
    protein = []
    for i in range(0, len(mrna_sequence), 3): # get each 3 letters on the mrna sequence and find the match at the Codon Table
        codon = mrna_sequence[i:i+3]
        amino_acid = genetic_code.get(codon, '')
        if amino_acid == 'Stop':
            break
        protein.append(amino_acid)
    return protein

if __name__ == "__main__":
    dna_input = input("Enter a DNA sequence (multiple of 3): ").upper()
    print ("Enter a DNA sequence (multiple of 3):", dna_input )

    if len(dna_input) % 3 != 0:
        print("Error: DNA sequence length should be a multiple of 3.")
    else:
        complement_sequence = get_complement(dna_input)
        mrna_sequence = transcribe_dna_to_mrna(complement_sequence)
        protein_sequence = translate_mrna_to_protein(mrna_sequence)

        print(f"Complement DNA: {complement_sequence}")
        print(f"mRNA: {mrna_sequence}")
        print(f"Amino Acid Sequence: {' - '.join(protein_sequence)}")


Enter a DNA sequence (multiple of 3): TTACGA
Complement DNA: AATGCT
mRNA: AAUGCU
Amino Acid Sequence: Asn (N) - Ala (A)


In [15]:
# Genetic code dictionary for amino acids to codons
amino_acid_to_codon = {
    'Phe': ['UUU', 'UUC'], 'Leu': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'],
    'Ile': ['AUU', 'AUC', 'AUA'], 'Met': ['AUG'],
    'Val': ['GUU', 'GUC', 'GUA', 'GUG'], 'Ser': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'],
    'Pro': ['CCU', 'CCC', 'CCA', 'CCG'], 'Thr': ['ACU', 'ACC', 'ACA', 'ACG'],
    'Ala': ['GCU', 'GCC', 'GCA', 'GCG'], 'Tyr': ['UAU', 'UAC'],
    'His': ['CAU', 'CAC'], 'Gln': ['CAA', 'CAG'],
    'Asn': ['AAU', 'AAC'], 'Lys': ['AAA', 'AAG'],
    'Asp': ['GAU', 'GAC'], 'Glu': ['GAA', 'GAG'],
    'Cys': ['UGU', 'UGC'], 'Trp': ['UGG'],
    'Arg': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
    'Gly': ['GGU', 'GGC', 'GGA', 'GGG'],
    'Stop': ['UAA', 'UAG', 'UGA']
}

# Function to get codon frequencies for given amino acid sequence
def get_codon_frequency(amino_acid_sequence):
    possible_mrna_sequences = []

    def generate_mrna(current_sequence, index): # convert amino acid sequence into all possible mRNA sequences
        # once index has reached the amino acid sequence length, finalize sequence
        if index == len(amino_acid_sequence):
            possible_mrna_sequences.append(current_sequence)
            return
        
        amino_acid = amino_acid_sequence[index]
        possible_codons = amino_acid_to_codon.get(amino_acid.capitalize(), []) # get the codons from genetic code dictionary
        
        # add the codons mapped from genetic code dictionary and move to next index
        for codon in possible_codons:
            generate_mrna(current_sequence + codon, index + 1)
    
    # Start
    generate_mrna("", 0)

    return possible_mrna_sequences

if __name__ == "__main__":
    # Input amino acid sequence (e.g., 'W-Y-W')
    amino_acid_input = input("Enter an amino acid sequence (e.g., W-Y-W): ").strip().upper().replace("-", "")
    print ("Input Aminoacid: ", amino_acid_input)
    
    # Validate max 3 amino acids
    if len(amino_acid_input) > 3:
        print("Error: Please provide a maximum of 3 amino acids.")
    else:
        # Map from one-letter to full names
        amino_acid_map = {
            'F': 'Phe', 'L': 'Leu', 'I': 'Ile', 'M': 'Met', 'V': 'Val',
            'S': 'Ser', 'P': 'Pro', 'T': 'Thr', 'A': 'Ala', 'Y': 'Tyr',
            'H': 'His', 'Q': 'Gln', 'N': 'Asn', 'K': 'Lys', 'D': 'Asp',
            'E': 'Glu', 'C': 'Cys', 'W': 'Trp', 'R': 'Arg', 'G': 'Gly', '*': 'Stop'
        }
        
        # Convert one-letter code to full name
        amino_acid_full_sequence = [amino_acid_map[aa] for aa in amino_acid_input]

        # Get all possible mRNA sequences
        possible_mrna_sequences = get_codon_frequency(amino_acid_full_sequence)
        
        for mrna_sequence in possible_mrna_sequences:
            print(f"\nmRNA = {mrna_sequence}")
            
            # Calculate frequency
            codon_frequency = {}
            for i in range(0, len(mrna_sequence), 3):
                codon = mrna_sequence[i:i+3]
                if codon in codon_frequency:
                    codon_frequency[codon] += 1
                else:
                    codon_frequency[codon] = 1
            
            for codon, freq in codon_frequency.items():
                print(f"{codon} = {freq}")


Input Aminoacid:  WYW

mRNA = UGGUAUUGG
UGG = 2
UAU = 1

mRNA = UGGUACUGG
UGG = 2
UAC = 1
