### Implementing Global Alignment

### Define:
##### 1. The Alphabet 
##### 2. The 2-D array (matrix) with the penalities for the different mismatches

In [26]:
# DNA alphabet
alphabet = ['A','C','G','T']

# PENALTY MATRIX

# 5x5 array, mismatches for each letter + skips(deletions)

# COLUMNS A, C, G, T, skip , same for ROWS
score = [[0, 4, 2, 4, 8], \
         [4, 0, 4, 2, 8], \
         [2, 4, 0, 4, 8], \
         [4, 2, 4, 0, 8], \
         [8, 8, 8, 8, 8]]  

### Modify Edit Distance Function to use the penalty matrix above 
##### (Global Alignment Function)

In [27]:
def globalAlignment(x, y):
  D = []
  for i in range(len(x)+1):     # Creating the x by y array (matrix) of zeros
    D.append([0]* (len(y)+1))

  for i in range(1, len(x)+1): # 1st column of matrix 
    D[i][0] = D[i-1][0] + score[alphabet.index(x[i-1])][-1]
  for i in range(1, len(y)+1): # 1st row of matrix
    D[0][i] = D[0][i-1] + score[-1][alphabet.index(y[i-1])]

  for i in range(1, len(x)+1): # rest of our matrix, going through each row
    for j in range(1, len(y)+1): # rest of our matrix, going through each column
      distHor = D[i][j-1] + score[-1][alphabet.index(y[j-1])]
      distVer = D[i-1][j] + score[alphabet.index(x[i-1])][-1]
      if x[i-1] == y[j-1]:     # if chars match edit distance does not change
        distDiag = D[i-1][j-1] # value will be the same as the cell above + left
      else:                    # if chars don't match        
        distDiag = D[i-1][j-1] + score[alphabet.index(x[i-1])][alphabet.index(y[j-1])]  

      D[i][j] = min(distHor, distVer, distDiag) # minimum of the three distances

  return D[-1][-1]

### Test Function

In [28]:
# identical x = y (no penalities)
x = 'TACCAGATTCGA'
y = 'TACCAGATTCGA'
globalAlignment(x,y)

0

In [29]:
# skip 'G' in y (skip penalty)
x = 'TACCAGATTCGA'
y = 'TACCAATTCGA'
globalAlignment(x,y)

8

In [31]:
# substitute 'G' in y with 'C' purine to pyrimidine (transversion penalty)
x = 'TACCAGATTCGA'
y = 'TACCACATTCGA'
globalAlignment(x,y)

4

In [32]:
# substitute 'G' in y with 'A', purine to purine (transition penalty)
x = 'TACCAGATTCGA'
y = 'TACCAAATTCGA'
globalAlignment(x,y)

2

In [33]:
# substitute 'G' in y with 'A', purine to purine  and a skip 'C' (transition + skip penalty)
x = 'TACCAGATTCGA'
y = 'TACCAAATTGA'
globalAlignment(x,y)

10