In [4]:
import numpy as np

# This code provides an end to end script to perform translation and transcription for DNA sequences of exons. This will predict the primary structure of the resultant protein.



# Codon table for RNA -> Protein translation
CODON_TABLE = {
    "UUU": "F", "UUC": "F",
    "UUA": "L", "UUG": "L",
    "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
    "AUU": "I", "AUC": "I", "AUA": "I",
    "AUG": "M",
    "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
    "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
    "UGU": "C", "UGC": "C",
    "UGG": "W",
    "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
    "UAU": "Y", "UAC": "Y",
    "UAA": "STOP", "UAG": "STOP", "UGA": "STOP",
    "GAA": "E", "GAG": "E",
    "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G",
    "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
    "AGU": "S", "AGC": "S",
    "AGA": "R", "AGG": "R"
}

# Converts a DNA sequence to RNA by replacing 'T' with 'U'
# Parameters: dna_seq (str) - a string of DNA nucleotides (e.g., "ATG")
# Returns: str - RNA sequence with T replaced by U (e.g., "AUG")
def convert_dna_to_rna(dna_seq):
    return dna_seq.replace("T", "U")

# Transcribes RNA from the input DNA strand, based on strand type
# Parameters:
#   rna_seq (str) - RNA sequence from convert_dna_to_rna
#   strand_type (str) - "template" or "coding"
# Returns: str - complementary RNA if template
def transcribe_rna(rna_seq, strand_type):
    transcribed = ""
    for base in rna_seq:
        if strand_type == "template":
            if base == "A":
                transcribed += "U"
            elif base == "U":
                transcribed += "A"
            elif base == "C":
                transcribed += "G"
            elif base == "G":
                transcribed += "C"
        elif strand_type == "coding":
            transcribed += base
        else:
            print("Invalid input for strand type.")
            exit()
    return transcribed

# Translates a given RNA sequence into an amino acid chain (protein)
# Parameters: rna_seq (str), which are RNA sequences
# Returns: str - protein sequence of one-letter amino acid codes; STOP codon = stop
def translate_rna_to_protein(rna_seq):
    protein = ""
    for i in range(0, len(rna_seq) - 2, 3):
        codon = rna_seq[i:i+3]
        amino = CODON_TABLE.get(codon, "?")
        if amino == "STOP":
            break
        protein += amino
    return protein


# No parameters. Takes input from user.
# Returns: N/A. Prints protein sequence
def main():
    dna_input = input("Enter DNA code: ").upper()
    strand_type = input("Template or coding? (type 'template' or 'coding'): ").lower()

    rna_seq = convert_dna_to_rna(dna_input)
    transcribed_rna = transcribe_rna(rna_seq, strand_type)
    protein_seq = translate_rna_to_protein(transcribed_rna)

    print("\nAmino acid sequence:")
    print(protein_seq)

if __name__ == "__main__":
    main()

KeyboardInterrupt: Interrupted by user