### Pairwise global sequence alignment

In [1]:
import numpy as np

In [2]:
seq1 = "ACGTCCTTCATT"
seq2 = "GTCTCATG"

num_rows = len(seq2) + 1
num_cols = len(seq1) + 1

F = np.zeros(shape=(num_rows,num_cols),dtype="int")

T = np.full(shape=(num_rows,num_cols),dtype="str", fill_value=" ")
 
d = -2 # gap penalty

F[0][0] = 0

for i in range(1, num_rows):
    F[i][0] = F[i-1][0] + d
    
for j in range(1, num_cols):
    F[0][j] = F[0][j-1] + d

In [3]:
T[0][0] = '.'

for i in range(1, num_rows):
    T[i][0] = 'u'
    
for j in range(1, num_cols):
    T[0][j] = 'l'

In [4]:
T

array([['.', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l'],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
       ['u', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']],
      dtype='<U1')

In [5]:
F

array([[  0,  -2,  -4,  -6,  -8, -10, -12, -14, -16, -18, -20, -22, -24],
       [ -2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ -4,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ -6,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ -8,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [-10,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [-12,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [-14,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [-16,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0]])

In [6]:
match_score = 1
mismatch_score = 0

for i in range(1, num_rows):
    for j in range(1, num_cols):
        if seq2[i-1] == seq1[j-1]:
            match_mismatch_score = F[i-1][j-1] + match_score
        elif seq2[i-1] != seq1[j-1]:
            match_mismatch_score = F[i-1][j-1] + mismatch_score
        
        right_gap_score = F[i][j-1] + d
        left_gap_score = F[i-1][j] + d
        
        F[i,j] = np.max([match_mismatch_score, right_gap_score, left_gap_score])
        which_max = np.argmax([match_mismatch_score, right_gap_score, left_gap_score])
        
        if which_max == 0:
            T[i,j] = 'd' # diagonal
        elif which_max == 1:
            T[i,j] = 'l' # left
        elif which_max == 2:
            T[i,j] = 'u' # up
            
print(F)
print(T)

[[  0  -2  -4  -6  -8 -10 -12 -14 -16 -18 -20 -22 -24]
 [ -2   0  -2  -3  -5  -7  -9 -11 -13 -15 -17 -19 -21]
 [ -4  -2   0  -2  -2  -4  -6  -8 -10 -12 -14 -16 -18]
 [ -6  -4  -1   0  -2  -1  -3  -5  -7  -9 -11 -13 -15]
 [ -8  -6  -3  -1   1  -1  -1  -2  -4  -6  -8 -10 -12]
 [-10  -8  -5  -3  -1   2   0  -1  -2  -3  -5  -7  -9]
 [-12  -9  -7  -5  -3   0   2   0  -1  -2  -2  -4  -6]
 [-14 -11  -9  -7  -4  -2   0   3   1  -1  -2  -1  -3]
 [-16 -13 -11  -8  -6  -4  -2   1   3   1  -1  -2  -1]]
[['.' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l']
 ['u' 'd' 'd' 'd' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l' 'l']
 ['u' 'd' 'd' 'd' 'd' 'l' 'l' 'd' 'd' 'l' 'l' 'd' 'd']
 ['u' 'd' 'd' 'd' 'd' 'd' 'd' 'l' 'l' 'd' 'l' 'l' 'l']
 ['u' 'd' 'u' 'd' 'd' 'l' 'd' 'd' 'd' 'l' 'l' 'd' 'd']
 ['u' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'd' 'l' 'l' 'l']
 ['u' 'd' 'u' 'd' 'd' 'u' 'd' 'd' 'd' 'd' 'd' 'l' 'l']
 ['u' 'u' 'd' 'd' 'd' 'u' 'd' 'd' 'd' 'd' 'd' 'd' 'd']
 ['u' 'u' 'd' 'd' 'u' 'd' 'd' 'u' 'd' 'd' 'd' 'd' 'd']]


In [7]:
direction = ' '

row_index = num_rows - 1 
col_index = num_cols - 1

aln_seq1 = ''
aln_seq2 = ''

while direction != '.':
    direction = T[row_index,col_index]
    
    if direction == 'd':
        row_index = row_index - 1
        col_index = col_index - 1
        aln_seq1 = aln_seq1 + seq1[col_index]
        aln_seq2 = aln_seq2 + seq2[row_index]
        
    elif direction == 'l':
        col_index = col_index - 1
        aln_seq2 = aln_seq2 + '-'
        aln_seq1 = aln_seq1 + seq1[col_index]
        
    elif direction == 'u':
        row_index = row_index - 1
        aln_seq1 = aln_seq1 + '-'
        aln_seq2 = aln_seq2 + seq2[row_index]
        
        
    print(aln_seq1[::-1])
    print(aln_seq2[::-1])

T
G
TT
TG
ATT
ATG
CATT
CATG
TCATT
TCATG
TTCATT
-TCATG
CTTCATT
C-TCATG
CCTTCATT
-C-TCATG
TCCTTCATT
T-C-TCATG
GTCCTTCATT
GT-C-TCATG
CGTCCTTCATT
-GT-C-TCATG
ACGTCCTTCATT
--GT-C-TCATG
ACGTCCTTCATT
--GT-C-TCATG
