In [2]:
function smith_waterman(seq1::String, seq2::String; match_score=2, mismatch_penalty=-1, gap_penalty=-1, k=1)
    m, n = length(seq1), length(seq2)
    score_matrix = zeros(Int, m+1, n+1)
    max_scores = []

    for i in 2:m+1
        for j in 2:n+1
            match = (seq1[i-1] == seq2[j-1] ? match_score : mismatch_penalty)
            score_diag = score_matrix[i-1, j-1] + match
            score_up = score_matrix[i-1, j] + gap_penalty
            score_left = score_matrix[i, j-1] + gap_penalty
            score_matrix[i, j] = max(0, score_diag, score_up, score_left)

            if score_matrix[i, j] > 0
                push!(max_scores, (score_matrix[i, j], (i, j)))
            end
        end
    end

    sort!(max_scores, by=x->x[1], rev=true)
    top_k_scores = max_scores[1:min(k, length(max_scores))]

    function traceback(score_pos)
        score, (i, j) = score_pos
        aligned_seq1 = ""
        aligned_seq2 = ""
        while i > 1 && j > 1 && score_matrix[i, j] > 0
            if score_matrix[i, j] == score_matrix[i-1, j-1] + (seq1[i-1] == seq2[j-1] ? match_score : mismatch_penalty)
                aligned_seq1 = seq1[i-1] * aligned_seq1
                aligned_seq2 = seq2[j-1] * aligned_seq2
                i -= 1
                j -= 1
            elseif score_matrix[i, j] == score_matrix[i-1, j] + gap_penalty
                aligned_seq1 = seq1[i-1] * aligned_seq1
                aligned_seq2 = "-" * aligned_seq2
                i -= 1
            else
                aligned_seq1 = "-" * aligned_seq1
                aligned_seq2 = seq2[j-1] * aligned_seq2
                j -= 1
            end
        end
        return (aligned_seq1, aligned_seq2, score)
    end

    alignments = map(traceback, top_k_scores)
    return alignments
end



smith_waterman (generic function with 1 method)

In [4]:

# Example usage
seq1 = "ACACACTA"
seq2 = "AGCACACA"
alignments = smith_waterman(seq1, seq2, k=3)
println("Top Alignments:")
for alignment in alignments
    println("Alignment: ", alignment[1], " - ", alignment[2], " Score: ", alignment[3])
end


Top Alignments:
Alignment: A-CACACTA - AGCACAC-A Score: 12
Alignment: A-CACAC - AGCACAC Score: 

11
Alignment: ACACA - ACACA Score: 10


In [4]:
function smith_waterman_single_vs_multiple(reference_seq::String, sequences::Vector{String}; match_score=2, mismatch_penalty=-1, gap_penalty=-1, k=1)
    results = []

    for seq in sequences
        alignments = smith_waterman(reference_seq, seq, match_score=match_score, mismatch_penalty=mismatch_penalty, gap_penalty=gap_penalty, k=k)
        push!(results, (seq, alignments))
    end

    return results
end


smith_waterman_single_vs_multiple (generic function with 1 method)

In [5]:
reference_seq = "ACACACTA"
other_sequences = ["AGCACACA", "TACACGT", "GTCAGC"]
alignments = smith_waterman_single_vs_multiple(reference_seq, other_sequences, k=2)

println("Alignments with reference sequence:")
for (seq, alignment_list) in alignments
    println("Alignments with sequence: ", seq)
    for alignment in alignment_list
        println(" - Alignment: ", alignment[1], " - ", alignment[2], " Score: ", alignment[3])
    end
end



Alignments with reference sequence:


Alignments with sequence: AGCACACA


 - Alignment: A-CACACTA - AGCACAC-A Score: 

12
 - Alignment: A-CACAC - AGCACAC Score: 11
Alignments with sequence: TACACGT
 - Alignment: ACAC-T - ACACGT Score: 9
 - Alignment: ACAC - ACAC Score: 8
Alignments with sequence: GTCAGC
 - Alignment: CA-C - CAGC Score: 5
 - Alignment: CA-C - CAGC Score: 5
