# Project 1: Implementation of Sequence Alignment Algorithms
Nikita Kozlov, 317099

In [17]:
# Needleman-Wunsch algorithm configuration
nw_maximal_final_alignments_count = 100
nw_alignment_score_matrix_csv_path="data/nw_alignment_score_matrix.csv"
nw_gap_penalty = -2
nw_output_path="data/nw_output.txt"

# nw_dna1 = "TGCTCGTA"
# nw_dna2 = "TTCATA"

# nw_dna1 = "TATA"
# nw_dna2 = "ATAT"

nw_dna1 = "ATGGAGTCTCCGCAGGGTCAGGAGTCCCTGAGC"
nw_dna2 = "ATGGAGTATCCGCAGGTCAGGTCCCTGAGC"

In [18]:
import pandas as pd
from utils import AlignmentScoreMatrix
from needleman_wunsch import NeedlemanWunschInput, needleman_wunsch, draw_needleman_wunsch

nw_alignment_score_matrix = AlignmentScoreMatrix.from_df(pd.read_csv(nw_alignment_score_matrix_csv_path))

input = NeedlemanWunschInput(
    dna1=nw_dna1, 
    dna2=nw_dna2, 
    maximal_final_alignments_count=nw_maximal_final_alignments_count, 
    alignment_score_matrix=nw_alignment_score_matrix, 
    gap_penalty=nw_gap_penalty
)

with open(nw_output_path, "w") as f:
    for i, output in enumerate(needleman_wunsch(input)):
        print(f"{output.aligned_dna1}\t{output.aligned_dna2}\t{output.final_score}")
        f.write(f"Global alignment no. {i+1}:\n")
        f.write(f"{output.aligned_dna1}\n")
        f.write(f"{output.aligned_dna2}\n")
        f.write(f"Score: {output.final_score}\n")
        f.write("\n")

# To visualize:
# for output in needleman_wunsch(input):
#     print(f"{output.aligned_dna1}\t{output.aligned_dna2}\t{output.final_score}")
#     draw_needleman_wunsch(output)


In [15]:
# Smith-Waterman algorithm configuration
sw_alignment_score_matrix_csv_path="data/sw_alignment_score_matrix.csv"
sw_gap_penalty = -2
sw_output_path="data/sw_output.txt"

# sw_dna1 = "TGCTCGTA"
# sw_dna2 = "TTCATA"

sw_dna1 = "ATGGAGTCTCCGCAGGGTCAGGAGTCCCTGAGC"
sw_dna2 = "ATGGAGTATCCGCAGGTCAGGTCCCTGAGC"

In [16]:
from utils import AlignmentScoreMatrix
from smith_waterman import SmithWatermanInput, smith_waterman

sw_alignment_score_matrix = AlignmentScoreMatrix.from_df(pd.read_csv(sw_alignment_score_matrix_csv_path))

input = SmithWatermanInput(
    dna1=sw_dna1, 
    dna2=sw_dna2, 
    alignment_score_matrix=sw_alignment_score_matrix,
    gap_penalty=sw_gap_penalty
)

output = smith_waterman(input)
print(f"{output.aligned_dna1}\t{output.aligned_dna2}\t{output.final_score}")

with open(sw_output_path, "w") as f:
    f.write(f"Local alignment:\n")
    f.write(f"{output.aligned_dna1}\n")
    f.write(f"{output.aligned_dna2}\n")
    f.write(f"Score: {output.final_score}\n")