# Needleman-Wunsch-Algorithm

- dynamic programming approach used for global sequence alignment
- introduced by Saul Ben Needleman and Christian Dennis Wunsch in 1970

In [228]:
seq1 = 'GAATTACAA'
seq2 = 'GATTGGACGAA'

match = 1
mismatch = -1
indel = -1

## Initialization

In [229]:
m, n = len(seq1) + 1, len(seq2) + 1

scoring_matrix = [[0 for j in range(n)] for i in range(m)]
pointer_matrix = [[0 for j in range(n)] for j in range(m)]

for i in range(m):
    scoring_matrix[i][0] = i * indel
for j in range(n):
    scoring_matrix[0][j] = j * indel

## Scoring

In [230]:
for i in range(1, m):
    for j in range(1, n):
        if seq1[i-1] == seq2[j-1]:
            diagonal = scoring_matrix[i-1][j-1] + match 
        else:
            diagonal = scoring_matrix[i-1][j-1] + mismatch
        top = scoring_matrix[i-1][j] + indel
        left = scoring_matrix[i][j-1] + indel

        if diagonal >= top and diagonal >= left:
            scoring_matrix[i][j] = diagonal
            pointer_matrix[i][j] = 1
        elif top >= left:
            scoring_matrix[i][j] = top
            pointer_matrix[i][j] = 2
        else:
            scoring_matrix[i][j] = left
            pointer_matrix[i][j] = 3

## Backtracking

In [231]:
seq1, seq2 = list(seq1), list(seq2)
i, j = m-1, n-1

while i > 0 or j > 0:
    if pointer_matrix[i][j] == 1:
        i -= 1
        j -= 1
    elif pointer_matrix[i][j] == 2:
        i -= 1
        seq2.insert(j, '-')
    else:
        j -= 1
        seq1.insert(i, '-')

alignment = ['|' if seq1[i] == seq2[i] else ' ' for i in range(len(seq1))]

print(''.join(seq1))
print(''.join(alignment))
print(''.join(seq2))

GAATT--AC-AA
| |||  || ||
G-ATTGGACGAA
