## Local alignment

#### Smith-Waterman

https://en.wikipedia.org/wiki/Smith–Waterman_algorithm
https://gtuckerkellogg.github.io/pairwise/demo/



In [9]:
import pandas as pd
import numpy as np

In [164]:
def createScorMat(match = 1,mismatch = 0):
    """
    Create scoring matrix for nucleotides with a given match and mismatch values
    """
    nts = ['A','C','T','G']

    scoringMatrix = np.zeros((len(nts),len(nts)),)
    np.fill_diagonal(scoringMatrix,match)
    scoringMatrix
    scoringMatrix[scoringMatrix == 0] = mismatch
    scoringMatrix = pd.DataFrame(scoringMatrix,index=nts,columns=nts)
    return scoringMatrix

#### Dynamic programming implementation
def scoringMatrix_local(x,y,subMat,indel):
    m = len(x) + 1
    n = len(y) + 1
    globMax = (0,0)
    scoringMat=np.zeros((n,m))
    for i in range(1,m):
        #print(x[i-1])
        for j in range(1,n):
            if x[i-1] == y[j-1]:
                #scoringMat[j-1][i-1] + subMat.loc[x[i-1]][y[j-1]]
                scoringMat[j][i] = scoringMat[j-1][i-1] + subMat.loc[x[i-1]][y[j-1]]
            else:
                mismatch = scoringMat[j-1][i-1] + subMat.loc[x[i-1]][y[j-1]]
                left = scoringMat[j-1][i] + indel
                up = scoringMat[j][i-1] + indel
                scoringMat[j][i] = max(mismatch,left,up,0)
            if scoringMat[j][i] >= scoringMat[globMax[1]][globMax[0]]:
                globMax = (i,j)
    return scoringMat,globMax

def backtrack_local(x,y,sm,maxLocs):
    i = maxLocs[0]
    j = maxLocs[1]
    w=''
    z=''
    z,w
    while i*j > 0:
        if x[i-1] == y[j-1]:
            w += x[i-1]
            z += y[j-1]
            i -= 1
            j -= 1
        else:
            left = sm[j-1][i]
            up = sm[j][i-1]
            diag = sm[j-1][i-1]
            whichmax = np.argmax([left,up,diag])
            if whichmax == 0:
                w += '-'
                z += y[j-1]
                j -= 1
            elif whichmax == 1:
                z += '-'
                w += x[i-1]
                i -= 1
            else:
                w += x[i-1]
                z += y[j-1]
                i -= 1
                j -= 1
        if sm[j][i] == 0:
            break
    return w[::-1],z[::-1]


def locAl(x,y,subMat,indel):
    '''
    A wrapper for DP implementation of Smith-Waterman
    '''
    sm, maxLocs = scoringMatrix_local(x,y,subMat)
    return backtrack_local(x,y,sm,maxLocs)


def vizScoringMat(x,y,scoringMat):
    return pd.DataFrame(scoringMat,index=['']+[_ for _ in y],
             columns=['']+[_ for _ in x])

In [166]:
x='TGTTACGG'
y='GGTTGACTA'
indel = -2
subMat = createScorMat(3,mismatch = -3)
subMat

Unnamed: 0,A,C,T,G
A,3.0,-3.0,-3.0,-3.0
C,-3.0,3.0,-3.0,-3.0
T,-3.0,-3.0,3.0,-3.0
G,-3.0,-3.0,-3.0,3.0


In [165]:
locAl(x,y,subMat,indel)

TypeError: scoringMatrix_local() missing 1 required positional argument: 'indel'

('GTTGACC', 'GTT-ACC')

In [100]:
sm[j][i]

13.0