<a href="https://colab.research.google.com/github/mostafadentist/python-ipynb/blob/main/Sequence_Alignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
import numpy as np

def needleman_wunsch(seq1, seq2, match=1, mismatch=-1, gap=-2):
    n, m = len(seq1), len(seq2)
    score = np.zeros((n+1, m+1), dtype=int)

    # Initialization
    for i in range(1, n+1):
        score[i,0] = i*gap
    for j in range(1, m+1):
        score[0,j] = j*gap

    # Matrix filling
    for i in range(1, n+1):
        for j in range(1, m+1):
            diag = score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch)
            delete = score[i-1,j] + gap
            insert = score[i,j-1] + gap
            score[i,j] = max(diag, delete, insert)

    # Traceback
    align1, align2 = "", ""
    i, j = n, m
    while i>0 or j>0:
        current = score[i,j]
        if i>0 and j>0 and current == score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch):
            align1 = seq1[i-1] + align1
            align2 = seq2[j-1] + align2
            i -= 1; j -= 1
        elif i>0 and current == score[i-1,j] + gap:
            align1 = seq1[i-1] + align1
            align2 = "-" + align2
            i -= 1
        else:
            align1 = "-" + align1
            align2 = seq2[j-1] + align2
            j -= 1
    return score, align1, align2

s1, s2 = "ACGT", "ACT"
matrix, a1, a2 = needleman_wunsch(s1, s2)
print("Alignment:\n", a1, "\n", a2)
print("Score Matrix:\n", matrix)

Alignment:
 ACGT 
 AC-T
Score Matrix:
 [[ 0 -2 -4 -6]
 [-2  1 -1 -3]
 [-4 -1  2  0]
 [-6 -3  0  1]
 [-8 -5 -2  1]]


In [79]:
def smith_waterman(seq1, seq2, match=2, mismatch=-1, gap=-1):
    n, m = len(seq1), len(seq2)
    score = np.zeros((n+1, m+1), dtype=int)

    max_score, max_pos = 0, None

    # Matrix filling
    for i in range(1, n+1):
        for j in range(1, m+1):
            diag = score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch)
            delete = score[i-1,j] + gap
            insert = score[i,j-1] + gap
            score[i,j] = max(0, diag, delete, insert)

            if score[i,j] >= max_score:
                max_score = score[i,j]
                max_pos = (i,j)

    # Traceback
    align1, align2 = "", ""
    i, j = max_pos
    while score[i,j] != 0:
        current = score[i,j]
        if current == score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch):
            align1 = seq1[i-1] + align1
            align2 = seq2[j-1] + align2
            i -= 1; j -= 1
        elif current == score[i-1,j] + gap:
            align1 = seq1[i-1] + align1
            align2 = "-" + align2
            i -= 1
        else:
            align1 = "-" + align1
            align2 = seq2[j-1] + align2
            j -= 1
    return score, align1, align2, max_score

s1, s2 = "ACACACTA", "AGCACACA"
matrix, a1, a2, max_score = smith_waterman(s1, s2)
print("Local Alignment:\n", a1, "\n", a2)
print("Best Score:", max_score)

Local Alignment:
 A-CACACTA 
 AGCACAC-A
Best Score: 12


In [80]:
def smith_waterman(seq1, seq2, match=2, mismatch=-1, gap=-1):
    n, m = len(seq1), len(seq2)
    score = np.zeros((n+1, m+1), dtype=int)

    max_score, max_pos = 0, None

    # Matrix filling
    for i in range(1, n+1):
        for j in range(1, m+1):
            diag = score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch)
            delete = score[i-1,j] + gap
            insert = score[i,j-1] + gap
            score[i,j] = max(0, diag, delete, insert)

            if score[i,j] >= max_score:
                max_score = score[i,j]
                max_pos = (i,j)

    # Traceback
    align1, align2 = "", ""
    i, j = max_pos
    while score[i,j] != 0:
        current = score[i,j]
        if current == score[i-1,j-1] + (match if seq1[i-1]==seq2[j-1] else mismatch):
            align1 = seq1[i-1] + align1
            align2 = seq2[j-1] + align2
            i -= 1; j -= 1
        elif current == score[i-1,j] + gap:
            align1 = seq1[i-1] + align1
            align2 = "-" + align2
            i -= 1
        else:
            align1 = "-" + align1
            align2 = seq2[j-1] + align2
            j -= 1
    return score, align1, align2, max_score

s1, s2 = "ACACACTA", "AGCACACA"
matrix, a1, a2, max_score = smith_waterman(s1, s2)
print("Local Alignment:\n", a1, "\n", a2)
print("Best Score:", max_score)

Local Alignment:
 A-CACACTA 
 AGCACAC-A
Best Score: 12


In [81]:
!pip install biopython
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

seq1 = "ACCGT"
seq2 = "ACG"

# Global alignment
alignments = pairwise2.align.globalms(seq1, seq2, 2, -1, -2, -0.5)
for aln in alignments:
    print(format_alignment(*aln))

# Local alignment
alignments = pairwise2.align.localms(seq1, seq2, 2, -1, -2, -0.5)
for aln in alignments:
    print(format_alignment(*aln))

ACCGT
| || 
A-CG-
  Score=2

ACCGT
|| | 
AC-G-
  Score=2

3 CG
  ||
2 CG
  Score=4

1 AC
  ||
1 AC
  Score=4





In [82]:
!pip install biopython
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

alignment = MultipleSeqAlignment([
    SeqRecord(Seq("ACTGCT"), id="Seq1"),
    SeqRecord(Seq("ACT-CT"), id="Seq2"),
    SeqRecord(Seq("A-TGTT"), id="Seq3")
])

print(alignment)

Alignment with 3 rows and 6 columns
ACTGCT Seq1
ACT-CT Seq2
A-TGTT Seq3


In [83]:
def plot_alignment(seq1, seq2, align1, align2):
    matches = ["|" if a==b else " " for a,b in zip(align1,align2)]
    print(seq1 + "\n" + "".join(matches) + "\n" + seq2)

plot_alignment("ACGT", "ACT", a1, a2)

ACGT
| ||||| |
ACT
