In [3]:
#This code aligns DNA sequences using the Needleman-Wunsch Algorithm's scoring matrix.
#This can be used when DNA mutations including insertions, deletions, frameshifts etc. are sustained to see if the sequences align regardless of the frame shift

import numpy as np

# Parameters: None (takes input)

# Returns: None (prints scoring)


# Input: 2 DNA sequences, Output: Needleman-Wunsch Aligned Sequences
# Transcribes them into RNA sequences depending on whether they represent the template strand (complementary RNA) or the coding strand
#Aligns the two RNA sequences using  Needleman-Wunsch algo  -- alignment with scoring: match +1, mismatch -1, gap -2.

# The function prints the matrix


def dna_alignment_and_transcription_tool():
    """
    Accepts two DNA sequences and whether it is template or coding strand
    Converts DNA sequences to RNA by replacing 'T' with 'U', Transcribes each RNA sequence according to the strand type:
       - Template strand: produces complementary RNA.
       - Coding strand: produces identical RNA (with Tâ†’U substitution).
    Aligns the two RNA sequences using Needleman-Wunsch scoring:
       - Match: +1
       - Mismatch: -1
       - Gap: -2
    Displays the resulting alignment scoring matrix.
    """

    # Input: DNA sequence 1
    code = input("Enter first DNA sequence: ").upper()

    # Input: DNA sequence 2 (also converted to RNA)
    code1 = input("Enter second DNA sequence: ").upper()


    templateOrCoding = input("Are the sequences 'template' or 'coding' strands? ").lower()


    codon_table = {
        "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
        "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
        "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
        "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
        "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "UGU": "C", "UGC": "C", "UGG": "W",
        "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
        "UAU": "Y", "UAC": "Y", "UAA": "STOP", "UAG": "STOP", "UGA": "STOP",
        "GAA": "E", "GAG": "E",
        "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G",
        "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
        "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R"
    }


    new_string1 = ""
    new_string2 = ""

    # Step 1: Replace thymine (T) with uracil (U) to simulate transcription
    code = code.replace('T', 'U')
    code1 = code1.replace('T', 'U')

    for i in range(len(code)):
        if templateOrCoding == "template":

            if code[i] == 'A':
                new_string1 += 'U'
            elif code[i] == 'U':
                new_string1 += 'A'
            elif code[i] == 'C':
                new_string1 += 'G'
            elif code[i] == 'G':
                new_string1 += 'C'
        elif templateOrCoding == "coding":
            # Coding strand RNA is identical except T becomes U
            new_string1 += code[i]

    # Step 3: Transcribe second RNA sequence similarly
    for i in range(len(code1)):
        if templateOrCoding == "template":
            if code1[i] == 'A':
                new_string2 += 'U'
            elif code1[i] == 'U':
                new_string2 += 'A'
            elif code1[i] == 'C':
                new_string2 += 'G'
            elif code1[i] == 'G':
                new_string2 += 'C'
        elif templateOrCoding == "coding":
            new_string2 += code1[i]

    # Step 4: Initialize a 2D scoring matrix for sequence alignment

    matrix = np.zeros((len(new_string1), len(new_string2)))

    # Step 5: Initialize gap penalties in first row and column
    for i in range(len(new_string1)):
        for j in range(len(new_string2)):
            if i == 0 and j > 0:
                matrix[i][j] = j * -2  # Gap penalty for columns
            if j == 0 and i > 0:
                matrix[i][j] = i * -2  # Gap penalty for rows

    # Step 6: Fill in the scoring matrix using match, mismatch, and gap logic
    for i in range(1, len(new_string1)):
        for j in range(1, len(new_string2)):
            if new_string1[i] == new_string2[j]:
                match = matrix[i - 1][j - 1] + 1      # Match score
            else:
                match = matrix[i - 1][j - 1] - 1      # Mismatch penalty
            gap1 = matrix[i - 1][j] - 2               # Gap in new_string2
            gap2 = matrix[i][j - 1] - 2               # Gap in new_string1
            matrix[i][j] = max(match, gap1, gap2)     # Choose the best score

    # Step 7: Output the alignment matrix
    print("\nAlignment scoring matrix:")
    print(matrix)


dna_alignment_and_transcription_tool()

Enter first DNA sequence: ATCCGGTCCTA
Enter second DNA sequence: ATCGGTCCTA
Are the sequences 'template' or 'coding' strands? coding

Alignment scoring matrix:
[[  0.  -2.  -4.  -6.  -8. -10. -12. -14. -16. -18.]
 [ -2.   1.  -1.  -3.  -5.  -7.  -9. -11. -13. -15.]
 [ -4.  -1.   2.   0.  -2.  -4.  -6.  -8. -10. -12.]
 [ -6.  -3.   0.   1.  -1.  -3.  -3.  -5.  -7.  -9.]
 [ -8.  -5.  -2.   1.   2.   0.  -2.  -4.  -6.  -8.]
 [-10.  -7.  -4.  -1.   2.   1.  -1.  -3.  -5.  -7.]
 [-12.  -9.  -6.  -3.   0.   3.   1.  -1.  -2.  -4.]
 [-14. -11.  -8.  -5.  -2.   1.   4.   2.   0.  -2.]
 [-16. -13. -10.  -7.  -4.  -1.   2.   5.   3.   1.]
 [-18. -15. -12.  -9.  -6.  -3.   0.   3.   6.   4.]
 [-20. -17. -14. -11.  -8.  -5.  -2.   1.   4.   7.]]
