In [36]:
!pip install numpy pandas



In [2]:
import numpy as np
import pandas as pd

![SWPairHMMGlobal](figures/phmm_random.png)

In [3]:
# transition probabilities

eta = 0.1

a = pd.DataFrame(
    np.array([
        # B    X1                   a        Y1       E
        [ 0.0, 1 - eta,             eta,     0.0,     0.0 ], # B
        [ 0.0, 1 - eta,             eta,     0.0,     0.0 ], # X1
        [ 0.0, 0.0,                 0.0,     1 - eta, eta ], # a
        [ 0.0, 0.0,                 0.0,     1 - eta, eta ], # Y1
        [ 0.0, 0.0,                 0.0,     0.0,     0.0 ]  # E
    ]),
    columns = ['B', 'X1', 'a', 'Y1', 'E']
)
a.index = ['B', 'X1', 'a', 'Y1', 'E']

a

Unnamed: 0,B,X1,a,Y1,E
B,0.0,0.9,0.1,0.0,0.0
X1,0.0,0.9,0.1,0.0,0.0
a,0.0,0.0,0.0,0.9,0.1
Y1,0.0,0.0,0.0,0.9,0.1
E,0.0,0.0,0.0,0.0,0.0


In [4]:
# emissions (insert / delete states)

eXY = {
    'A': 0.25,
    'C': 0.25,
    'G': 0.25,
    'T': 0.25
}

eXY

{'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}

In [38]:
def max_vX1(S1, S2, i, j, v_X1, v_a, v_Y1, tb):
    q = eXY[S1[0]]
    
    r = np.array([
        a.loc['X1', 'X1']*v_X1[i - 1][j]
    ])
    
    mr = np.max(r)
    mi = np.argmax(r)
    
    v_X1[i][j] = q*mr
    tb[i][j] = mi

def max_va(S1, S2, i, j, v_X1, v_a, v_Y1, tb):
    q = eXY[S2[0]]
    
    r = np.array([
        a.loc['X1', 'a']*v_X1[i][j - 1]
    ])
    
    mr = np.max(r)
    mi = np.argmax(r)
    
    v_a[i][j] = q*mr
    tb[i][j] = mi


def max_vY1(S1, S2, i, j, v_X1, v_a, v_Y1, tb):
    q = eXY[S2[0]]
    
    r = np.array([
        a.loc['a', 'Y1']*v_a[i][j - 1],
        a.loc['Y1', 'Y1']*v_Y1[i][j - 1]
    ])
    
    mr = np.max(r)
    mi = np.argmax(r)
    
    v_Y1[i][j] = q*mr
    tb[i][j] = mi

def traceback(S1, S2, v_X1, v_a, v_Y1, tb_X1, tb_a, tb_Y1):
    i = len(A)
    j = len(B)
    n = np.max([len(A), len(B)]) - 1
    
    r = np.array([ v_X1[i][j], v_Y1[i][j] ])
    
    mr = np.max(r)
    mi = np.argmax(r)
    
    a1 = (n+1)*[' ']
    a2 = (n+1)*[' ']
            
    while i > 0 and j > 0:
        if mi == 0:
            a1[n] = '-'
            a2[n] = S2[j - 1]
            mi = tb_X1[i][j]
            i -= 1
            j -= 1
        elif mi == 1:
            a1[n] = S1[i - 1]
            a2[n] = '-'        
            mi = tb_Y1[i][j]
            i -= 1
            
        n -= 1
        
    return ''.join(a1), ''.join(a2)

In [39]:
# Initialization

## Specify sequences

A = "TTTCAACG"
B = "ACG"

## Viterbi matrices

vX1 = np.zeros([len(A)+1, len(B)+1])
va  = np.zeros([len(A)+1, len(B)+1])
vY1 = np.zeros([len(A)+1, len(B)+1])

vX1[0,0] = 1
va[0,0]  = 0
vY1[0,0] = 0

## Traceback matrices

tX1 = np.zeros([len(A)+1, len(B)+1])
ta  = np.zeros([len(A)+1, len(B)+1])
tY1 = np.zeros([len(A)+1, len(B)+1])

In [40]:
# Populate Viterbi matrices

for i in range(1, len(A)+1):
    for j in range(1, len(B)+1):
        max_vX1(A, B, i, j, vM, vX, vY, tX)
        max_va(A, B, i, j, vM, vX, vY, tM)
        max_vY1(A, B, i, j, vM, vX, vY, tY)

In [42]:
# Follow traceback matrices

a1, a2 = traceback(A, B, vM, vX, vY, tM, tX, tY)

In [44]:
# Print alignment

print(a1)
print(a2)

TTACG
T-A-G
