In [12]:
# Step 1: Install Biopython
# !pip install biopython

# Step 2: Import necessary modules
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"
target_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"  # You can change this to any target sequence

# Perform local alignment using Smith-Waterman algorithm
alignments = pairwise2.align.localxx(query_sequence, target_sequence)

# Print all alignments
for alignment in alignments:
    print(format_alignment(*alignment))

# Calculate identity
def calculate_identity(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    matches = sum(a == b for a, b in zip(aligned_seq1, aligned_seq2))
    identity = matches / len(aligned_seq1) * 100
    return identity

# Print the identity of each alignment
for alignment in alignments:
    identity = calculate_identity(alignment)
    print(f"Identity: {identity:.2f}%")


ATTATATCGCGCGCGCGCGCGCGCGCG
|||||||||||||||||||||||||||
ATTATATCGCGCGCGCGCGCGCGCGCG
  Score=27

Identity: 100.00%


# SMITH WATERMAN

In [11]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"
target_sequence = "CGCGCG"  # Replace with your target sequence

# Perform local alignment using Smith-Waterman algorithm with zero gap penalties
alignments = pairwise2.align.localxs(query_sequence, target_sequence, 0, 0)

# Function to calculate the percentage identity of the alignment
def calculate_identity(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    
    # Remove gaps from both sequences
    aligned_seq1_no_gaps = ''.join(a for a in aligned_seq1 if a != '-')
    aligned_seq2_no_gaps = ''.join(b for b in aligned_seq2 if b != '-')
    
    # Calculate matches
    matches = sum(a == b for a, b in zip(aligned_seq1_no_gaps, aligned_seq2_no_gaps))
    
    # Calculate identity based on the length of the aligned target sequence
    if len(aligned_seq2_no_gaps) > 0:
        identity = (matches / len(aligned_seq2_no_gaps)) * 100
    else:
        identity = 0.0
    
    return identity

# Extract exact match region and print the percentage identity
for alignment in alignments:
    start = alignment[3]
    end = alignment[4]
    matched_region = query_sequence[start:end]
    
    identity = calculate_identity(alignment)
    
    print(f"Exact Match Region from Query Sequence:")
    print(f"Start Position: {start}")
    print(f"End Position: {end}")
    print(f"Matched Region: {matched_region}")
    print(f"Identity: {identity:.2f}%")
    print(format_alignment(*alignment))


Exact Match Region from Query Sequence:
Start Position: 21
End Position: 27
Matched Region: CGCGCG
Identity: 0.00%
22 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 19
End Position: 27
Matched Region: CGCGCGCG
Identity: 0.00%
20 CGCGCGCG
   |  |||||
 1 C--GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 17
End Position: 27
Matched Region: CGCGCGCGCG
Identity: 0.00%
18 CGCGCGCGCG
   |    |||||
 1 C----GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 15
End Position: 27
Matched Region: CGCGCGCGCGCG
Identity: 0.00%
16 CGCGCGCGCGCG
   |      |||||
 1 C------GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 13
End Position: 27
Matched Region: CGCGCGCGCGCGCG
Identity: 0.00%
14 CGCGCGCGCGCGCG
   |        |||||
 1 C--------GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 11
End Position: 27
Matched Region: CGCGCGCGCGCGCGCG
Identity: 0.00%
12 CGCGCGCGCGCG

# GAP PENALTY ISSUES

In [5]:
# Step 1: Install Biopython
# !pip install biopython

# Step 2: Import necessary modules
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"
target_sequence = "CGCGCG"  # Replace with your target sequence

# Perform local alignment using Smith-Waterman algorithm with gap penalties
alignments = pairwise2.align.localxs(query_sequence, target_sequence, -1, -1)

# Print all alignments
for alignment in alignments:
    print(format_alignment(*alignment))

# Calculate identity
def calculate_identity(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    # Remove gaps before calculating identity
    aligned_seq1_no_gaps = ''.join(a for a in aligned_seq1 if a != '-')
    aligned_seq2_no_gaps = ''.join(b for b in aligned_seq2 if b != '-')
    matches = sum(a == b for a, b in zip(aligned_seq1_no_gaps, aligned_seq2_no_gaps))
    identity = matches / len(aligned_seq1_no_gaps) * 100
    return identity

# Print the identity of each alignment
for alignment in alignments:
    identity = calculate_identity(alignment)
    print(f"Identity: {identity:.2f}%")


22 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

20 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

18 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

16 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

14 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

12 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

10 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

8 CGCGCG
  ||||||
1 CGCGCG
  Score=6

Identity: 0.00%
Identity: 0.00%
Identity: 0.00%
Identity: 0.00%
Identity: 0.00%
Identity: 0.00%
Identity: 0.00%
Identity: 0.00%


In [13]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"
target_sequence = "CGCGCG"  # Replace with your target sequence

# Perform local alignment using Smith-Waterman algorithm with zero gap penalties
alignments = pairwise2.align.localxs(query_sequence, target_sequence, 0, 0)

# Extract and print the region of the query sequence where the exact match is found
def extract_exact_match_region(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    # Extract the region of the query sequence where the match occurs
    match_start = begin
    match_end = end
    
    # Extract aligned segment from the query sequence
    matched_region = query_sequence[match_start:match_end]
    
    return matched_region, match_start, match_end

# Print all alignments and extract exact match regions
for alignment in alignments:
    matched_region, start, end = extract_exact_match_region(alignment)
    print(f"Exact Match Region from Query Sequence:")
    print(f"Start Position: {start}")
    print(f"End Position: {end}")
    print(f"Matched Region: {matched_region}")
    print(format_alignment(*alignment))


Exact Match Region from Query Sequence:
Start Position: 21
End Position: 27
Matched Region: CGCGCG
22 CGCGCG
   ||||||
 1 CGCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 19
End Position: 27
Matched Region: CGCGCGCG
20 CGCGCGCG
   |  |||||
 1 C--GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 17
End Position: 27
Matched Region: CGCGCGCGCG
18 CGCGCGCGCG
   |    |||||
 1 C----GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 15
End Position: 27
Matched Region: CGCGCGCGCGCG
16 CGCGCGCGCGCG
   |      |||||
 1 C------GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 13
End Position: 27
Matched Region: CGCGCGCGCGCGCG
14 CGCGCGCGCGCGCG
   |        |||||
 1 C--------GCGCG
  Score=6

Exact Match Region from Query Sequence:
Start Position: 11
End Position: 27
Matched Region: CGCGCGCGCGCGCGCG
12 CGCGCGCGCGCGCGCG
   |          |||||
 1 C----------GCGCG
  Score=6

Exact Match Region from Query Sequence:


In [14]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "ATTATATCGCGCGCGCGCGCGCGCGCG"
target_sequence = "CGCGCG"  # Replace with your target sequence

# Perform local alignment using Smith-Waterman algorithm with zero gap penalties
alignments = pairwise2.align.localxs(query_sequence, target_sequence, 0, 0)

# Extract and print the region of the query sequence where the exact match is found
def extract_exact_match_region(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    # Extract the region of the query sequence where the match occurs
    match_start = begin
    match_end = end
    
    # Extract aligned segment from the query sequence
    matched_region = query_sequence[match_start:match_end]
    
    # Calculate the percentage identity of the match
    identity = (aligned_seq2.count('|') / len(aligned_seq1)) * 100

    return matched_region, match_start, match_end, identity

# Print all alignments and extract exact match regions
for alignment in alignments:
    matched_region, start, end, identity = extract_exact_match_region(alignment)
    print(f"Exact Match Region from Query Sequence:")
    print(f"Start Position: {start}")
    print(f"End Position: {end}")
    print(f"Matched Region: {matched_region}")
    print(f"Identity: {identity:.2f}%")
    print(format_alignment(*alignment))
    print()

    # Highlight 100% identity alignments
    if identity == 100.0:
        print("This alignment has 100% identity!")
        print()


Exact Match Region from Query Sequence:
Start Position: 21
End Position: 27
Matched Region: CGCGCG
Identity: 0.00%
22 CGCGCG
   ||||||
 1 CGCGCG
  Score=6


Exact Match Region from Query Sequence:
Start Position: 19
End Position: 27
Matched Region: CGCGCGCG
Identity: 0.00%
20 CGCGCGCG
   |  |||||
 1 C--GCGCG
  Score=6


Exact Match Region from Query Sequence:
Start Position: 17
End Position: 27
Matched Region: CGCGCGCGCG
Identity: 0.00%
18 CGCGCGCGCG
   |    |||||
 1 C----GCGCG
  Score=6


Exact Match Region from Query Sequence:
Start Position: 15
End Position: 27
Matched Region: CGCGCGCGCGCG
Identity: 0.00%
16 CGCGCGCGCGCG
   |      |||||
 1 C------GCGCG
  Score=6


Exact Match Region from Query Sequence:
Start Position: 13
End Position: 27
Matched Region: CGCGCGCGCGCGCG
Identity: 0.00%
14 CGCGCGCGCGCGCG
   |        |||||
 1 C--------GCGCG
  Score=6


Exact Match Region from Query Sequence:
Start Position: 11
End Position: 27
Matched Region: CGCGCGCGCGCGCGCG
Identity: 0.00%
12 CGCGCGC

In [1]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the query sequence and target sequence
query_sequence = "GGGAATTCATGGAGCAGTGGGTGGATCCAG"
target_sequence = "GATAGC36GCTATC"  # Replace with your target sequence

# Perform local alignment using Smith-Waterman algorithm with zero gap penalties
alignments = pairwise2.align.localxs(query_sequence, target_sequence, 0, 0)

# Extract and print the region of the query sequence where the exact match is found
def extract_exact_match_region(alignment):
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    # Extract the region of the query sequence where the match occurs
    match_start = begin
    match_end = end
    
    # Extract aligned segment from the query sequence
    matched_region = query_sequence[match_start:match_end]

    # Calculate percentage identity
    # Alignments provided by Bio.pairwise2 may include gaps and the identity should
    # be computed considering only the non-gap characters.
    match_count = sum(1 for a, b in zip(aligned_seq1, aligned_seq2) if a == b and a != '-' and b != '-')
    total_length = len(aligned_seq2.replace('-', ''))  # Length of non-gap characters in target sequence
    identity = (match_count / total_length) * 100 if total_length > 0 else 0

    return matched_region, match_start, match_end, identity

# Print all alignments and extract exact match regions
for alignment in alignments:
    matched_region, start, end, identity = extract_exact_match_region(alignment)
    print(f"Exact Match Region from Query Sequence:")
    print(f"Start Position: {start}")
    print(f"End Position: {end}")
    print(f"Matched Region: {matched_region}")
    print(f"Identity: {identity:.2f}%")
    print(format_alignment(*alignment))
    print()

    # Highlight 100% identity alignments
    if identity == 100.0:
        print("This alignment has 100% identity!")
        print()


Exact Match Region from Query Sequence:
Start Position: 2
End Position: 31
Matched Region: GAATTCATGGAGCAGTGGGTGGATCCAG
Identity: 78.57%
3 GAATTCATGGAGCAGTGG--G-TGGATCC
  |     ||  |||       | |  || |
1 G-----AT--AGC-----36GCT--AT-C
  Score=11


Exact Match Region from Query Sequence:
Start Position: 1
End Position: 31
Matched Region: GGAATTCATGGAGCAGTGGGTGGATCCAG
Identity: 78.57%
2 GGAATTCATGGAGCAGTGG--G-TGGATCC
  |      ||  |||       | |  || |
1 G------AT--AGC-----36GCT--AT-C
  Score=11


Exact Match Region from Query Sequence:
Start Position: 0
End Position: 31
Matched Region: GGGAATTCATGGAGCAGTGGGTGGATCCAG
Identity: 78.57%
1 GGGAATTCATGGAGCAGTGG--G-TGGATCC
  |       ||  |||       | |  || |
1 G-------AT--AGC-----36GCT--AT-C
  Score=11


Exact Match Region from Query Sequence:
Start Position: 2
End Position: 31
Matched Region: GAATTCATGGAGCAGTGGGTGGATCCAG
Identity: 78.57%
3 GAATTCATGGAGCAGTGG--G-TGGATCC
  | |    |  |||       | |  || |
1 G-A----T--AGC-----36GCT--AT-C
  Score=11


Exac

