<a href="https://colab.research.google.com/github/pranathiperii/analyseDNA/blob/main/Variation_to_Amino_Acid_changes_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Report on Amino Acid changes, if two sequences are pasted

In [30]:
from Bio.Seq import Seq
from tabulate import tabulate


def normalize_seq(seq):
    return seq.upper().replace(" ", "").replace("\n", "")#a function to normalise the sequence, to ensure all the cases are uppercase and no whitespaces

ref_seq = normalize_seq(input("Enter reference sequence: "))
sample_seq = normalize_seq(input("Enter sample sequence: "))


def safe_translate(seq):
    seq = seq[:len(seq)//3*3]  # trim to multiple of 3
    return str(Seq(seq).translate())#a function to indicate to translate the sequence till the last place, where len divided by 3 = 0

ref_prot = safe_translate(ref_seq)
sample_prot = safe_translate(sample_seq)

# Amino acid info
aa_info = {
    "A": ("Alanine", "Nonpolar, aliphatic"),
    "R": ("Arginine", "Positively charged, basic"),
    "N": ("Asparagine", "Polar, uncharged"),
    "D": ("Aspartic acid", "Negatively charged, acidic"),
    "C": ("Cysteine", "Polar, forms disulfide bonds"),
    "E": ("Glutamic acid", "Negatively charged, acidic"),
    "Q": ("Glutamine", "Polar, uncharged"),
    "G": ("Glycine", "Nonpolar, smallest"),
    "H": ("Histidine", "Positively charged, aromatic"),
    "I": ("Isoleucine", "Nonpolar, aliphatic"),
    "L": ("Leucine", "Nonpolar, aliphatic"),
    "K": ("Lysine", "Positively charged, basic"),
    "M": ("Methionine", "Nonpolar, sulfur-containing"),
    "F": ("Phenylalanine", "Nonpolar, aromatic"),
    "P": ("Proline", "Nonpolar, rigid"),
    "S": ("Serine", "Polar, uncharged"),
    "T": ("Threonine", "Polar, uncharged"),
    "W": ("Tryptophan", "Nonpolar, aromatic"),
    "Y": ("Tyrosine", "Polar, aromatic"),
    "V": ("Valine", "Nonpolar, aliphatic"),
    "*": ("Stop", "Stop codon")
}

table = []
min_len = min(len(ref_prot), len(sample_prot))

for i in range(min_len):
    ref_aa = ref_prot[i]
    sample_aa = sample_prot[i]

    if ref_aa != sample_aa:
        # Determine mutation type
        mut_type = "Nonsense" if sample_aa == "*" else "Missense"
        #sample_aa == *, means the mutation led to a stop codon.

        # AA names and properties
        ref_name, ref_prop = aa_info.get(ref_aa, ("Unknown", "Unknown"))
        sample_name, sample_prop = aa_info.get(sample_aa, ("Unknown", "Unknown"))

        table.append([
            i+1,
            ref_aa,
            sample_aa,
            f"{ref_name} ({ref_prop})",
            f"{sample_name} ({sample_prop})",
            mut_type
        ])

# Check for length differences
if len(ref_prot) != len(sample_prot):
    table.append([
        "-",
        "-",
        "-",
        f"Protein length {len(ref_prot)}",
        f"Protein length {len(sample_prot)}",
        "Possible frameshift"
    ])

# Print table
if table:
    print("\nAmino acid differences:")
    print(tabulate(table, headers=["Position", "Ref AA", "Sample AA", "Ref Info", "Sample Info", "Mutation Type"], tablefmt="grid"))
else:
    print("✅ No amino acid differences detected.")


Enter reference sequence: ACGG
Enter sample sequence: AGGC

Amino acid differences:
+------------+----------+-------------+------------------------------+--------------------------------------+-----------------+
|   Position | Ref AA   | Sample AA   | Ref Info                     | Sample Info                          | Mutation Type   |
|          1 | T        | R           | Threonine (Polar, uncharged) | Arginine (Positively charged, basic) | Missense        |
+------------+----------+-------------+------------------------------+--------------------------------------+-----------------+
