In [None]:
# Problem 1 - Implement the Needleman-Wunsch algorithm 
# to compute the optimal alignment for a pair of sequences

import numpy

class NeedlemanWunsch(object):
    def __init__(self, string1, string2, gapScore=-2, matchScore=3, mismatchScore=-3):
        """ Finds an optimal global alignment of two strings.  """
        
        self.editMatrix = numpy.zeros(shape=[len(string1)+1, len(string2)+1], dtype=int) # Numpy matrix representing edit matrix
        # Preinitialized to have zero values

        # Code to complete to compute the edit matrix
        firstCol = []
        val = 0
        for i in range(len(string1)+1):
          firstCol.append(val)
          val -= 2
        self.editMatrix[:, 0] = firstCol

        firstRow = []
        val = 0
        for i in range(len(string2)+1):
          firstRow.append(val)
          val -= 2
        self.editMatrix[0, :] = firstRow

        m = len(string1)
        n = len(string2)
        for i in range(1, m+1):
          for j in range(1, n+1):
            if string1[i-1] == string2[j-1]:
              self.editMatrix[i, j] = self.editMatrix[i-1, j-1]+3
            else:
              deletion = self.editMatrix[i-1, j]-2
              insertion = self.editMatrix[i, j-1]-2
              match = self.editMatrix[i-1, j-1]-3
              self.editMatrix[i, j] = max(deletion, insertion, match)
                
    def getAlignmentScore(self):
        """ Return the alignment score """
        
        # Code to complete
        x, y = (self.editMatrix.shape)
        return self.editMatrix[x-1][y-1]
                
    def getAlignment(self, string1, string2):
        """ Returns an optimal global alignment of two strings. Aligned
        is returned as an ordered list of aligned pairs.
        
        e.g. For the two strings GATTACA and TACA an global alignment is
        is GATTACA
           ---TACA
        This alignment would be returned as:
        
        [(3, 0), (4, 1), (5, 2), (6, 3)]
        """
        
        alignedPairs = []
        
        # Code to complete - generated by traceback through matrix to generate aligned pairs
        x, y = (self.editMatrix.shape)
        x-=1; y-=1
        while x>0 and y>0:
          if self.editMatrix[x][y] == self.editMatrix[x-1][y-1]+3:
            x-=1; y-=1
            alignedPairs.append((x, y))
          elif self.editMatrix[x][y] == self.editMatrix[x][y-1]-2:
            y-=1
          elif self.editMatrix[x][y] == self.editMatrix[x-1][y]-2:
            x-=1
        alignedPairs.reverse()
        return alignedPairs
        
        
      
string1 = "GATTACA"
string2 =   "TACA"

needlemanWunsch = NeedlemanWunsch(string1, string2)

[[  0  -2  -4  -6  -8]
 [ -2  -3  -5  -7  -9]
 [ -4  -5   0  -2  -4]
 [ -6  -1  -2  -3  -5]
 [ -8  -3  -4  -5  -6]
 [-10  -5   0  -2  -2]
 [-12  -7  -2   3   1]
 [-14  -9  -4   1   6]]


In [None]:
# Test the edit matrix get built right

needlemanWunsch.editMatrix == [[  0,  -2,  -4,  -6,  -8],
       [ -2,  -3,  -5,  -7,  -9],
       [ -4,  -5,   0,  -2,  -4],
       [ -6,  -1,  -2,  -3,  -5],
       [ -8,  -3,  -4,  -5,  -6],
       [-10,  -5,   0,  -2,  -2],
       [-12,  -7,  -2,   3,   1],
       [-14,  -9,  -4,   1,   6]]

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [None]:
# Test the score function

needlemanWunsch.getAlignmentScore() == 6

True

In [None]:
# Test the get alignment function

needlemanWunsch.getAlignment(string1, string2) == [(3, 0), (4, 1), (5, 2), (6, 3)]

True