# Sequence alignment

In [2]:
def needleman_wunsch(seq1, seq2, match_score=1, gap_cost=-1, mismatch_cost=-1):
    # Initialize the scoring matrix
    n = len(seq1)
    m = len(seq2)
    scoring_matrix = [[0] * (m + 1) for _ in range(n + 1)]
    
    # Initialize the first row and column with gap penalties
    for i in range(1, n + 1):
        scoring_matrix[i][0] = scoring_matrix[i-1][0] + gap_cost
    for j in range(1, m + 1):
        scoring_matrix[0][j] = scoring_matrix[0][j-1] + gap_cost
    
    # Fill the scoring matrix
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            if seq1[i - 1] == seq2[j - 1]:
                score = match_score
            else:
                score = mismatch_cost
            match = scoring_matrix[i - 1][j - 1] + score
            delete = scoring_matrix[i - 1][j] + gap_cost
            insert = scoring_matrix[i][j - 1] + gap_cost
            scoring_matrix[i][j] = max(match, delete, insert)
    
    # Traceback to find the optimal alignment
    align1 = ""
    align2 = ""
    i, j = n, m
    while i > 0 and j > 0:
        score_current = scoring_matrix[i][j]
        score_diagonal = scoring_matrix[i - 1][j - 1]
        score_up = scoring_matrix[i][j - 1]
        score_left = scoring_matrix[i - 1][j]
        
        if score_current == score_diagonal + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_cost):
            align1 += seq1[i - 1]
            align2 += seq2[j - 1]
            i -= 1
            j -= 1
        elif score_current == score_left + gap_cost:
            align1 += seq1[i - 1]
            align2 += "-"
            i -= 1
        elif score_current == score_up + gap_cost:
            align1 += "-"
            align2 += seq2[j - 1]
            j -= 1
    
    # Finish tracing up to the top left cell
    while i > 0:
        align1 += seq1[i - 1]
        align2 += "-"
        i -= 1
    while j > 0:
        align1 += "-"
        align2 += seq2[j - 1]
        j -= 1
    
    # Reverse the alignments (since they were constructed backwards)
    align1 = align1[::-1]
    align2 = align2[::-1]
    
    return align1, align2, scoring_matrix[-1][-1]

# Example usage:
seq1 = "ACGTCATCA"
seq2 = "TAGTGTCA"
alignment = needleman_wunsch(seq1, seq2)
print("Alignment 1:", alignment[0])
print("Alignment 2:", alignment[1])
print("Score:", alignment[2])


Alignment 1: -ACGTCATCA
Alignment 2: TA-GT-GTCA
Score: 2


In [4]:
def needleman_wunsch_affine(seq1, seq2, match_score=1, gap_opening=-2, gap_extension=-1, mismatch_cost=-1):
    n = len(seq1)
    m = len(seq2)

    # Initialize the scoring matrices and traceback matrices
    M = [[0] * (m + 1) for _ in range(n + 1)]  # Maximum score for aligning seq1[1..i] with seq2[1..j]
    Ix = [[0] * (m + 1) for _ in range(n + 1)] # Maximum score for aligning seq1[1..i] with a gap in seq2
    Iy = [[0] * (m + 1) for _ in range(n + 1)] # Maximum score for aligning seq2[1..j] with a gap in seq1

    # Initialize the matrices
    for i in range(1, n + 1):
        M[i][0] = gap_opening + (i - 1) * gap_extension
        Ix[i][0] = gap_opening + (i - 1) * gap_extension
        Iy[i][0] = float('-inf')
    for j in range(1, m + 1):
        M[0][j] = gap_opening + (j - 1) * gap_extension
        Iy[0][j] = gap_opening + (j - 1) * gap_extension
        Ix[0][j] = float('-inf')

    # Fill the matrices
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            if seq1[i - 1] == seq2[j - 1]:
                score = match_score
            else:
                score = mismatch_cost
            
            # Calculate M, Ix, Iy
            M[i][j] = max(M[i - 1][j - 1] + score,
                          Ix[i - 1][j - 1] + score,
                          Iy[i - 1][j - 1] + score)
            Ix[i][j] = max(M[i - 1][j] + gap_opening + gap_extension,
                           Ix[i - 1][j] + gap_extension)
            Iy[i][j] = max(M[i][j - 1] + gap_opening + gap_extension,
                           Iy[i][j - 1] + gap_extension)

    # Traceback to find the optimal alignment
    align1 = ""
    align2 = ""
    i, j = n, m

    # Determine the starting matrix for traceback
    if M[i][j] >= Ix[i][j] and M[i][j] >= Iy[i][j]:
        current_matrix = 'M'
    elif Ix[i][j] >= M[i][j] and Ix[i][j] >= Iy[i][j]:
        current_matrix = 'Ix'
    else:
        current_matrix = 'Iy'

    while i > 0 or j > 0:
        if current_matrix == 'M':
            if i > 0 and j > 0 and (M[i][j] == M[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_cost)):
                align1 += seq1[i - 1]
                align2 += seq2[j - 1]
                i -= 1
                j -= 1
                current_matrix = 'M'
            elif i > 0 and j > 0 and (M[i][j] == Ix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_cost)):
                align1 += seq1[i - 1]
                align2 += seq2[j - 1]
                i -= 1
                j -= 1
                current_matrix = 'Ix'
            else:
                align1 += seq1[i - 1]
                align2 += seq2[j - 1]
                i -= 1
                j -= 1
                current_matrix = 'Iy'
        elif current_matrix == 'Ix':
            align1 += seq1[i - 1]
            align2 += "-"
            i -= 1
            if M[i][j] + gap_opening + gap_extension == Ix[i + 1][j]:
                current_matrix = 'M'
            else:
                current_matrix = 'Ix'
        else:  # current_matrix == 'Iy'
            align1 += "-"
            align2 += seq2[j - 1]
            j -= 1
            if M[i][j] + gap_opening + gap_extension == Iy[i][j + 1]:
                current_matrix = 'M'
            else:
                current_matrix = 'Iy'

    # Reverse the alignments (since they were constructed backwards)
    align1 = align1[::-1]
    align2 = align2[::-1]

    return align1, align2, M[n][m]

# Example usage:
seq1 = "ACGTCATCA"
seq2 = "TAGTGTCA"
alignment = needleman_wunsch_affine(seq1, seq2)
print("Alignment 1:", alignment[0])
print("Alignment 2:", alignment[1])
print("Score:", alignment[2])


Alignment 1: ACGTCATCA
Alignment 2: TAGT-GTCA
Score: -1
