Sanity check for Viterbi algorithm implementation, by testing on HW4.

In [1]:
import pandas as pd

## Parameters

In [2]:
states = ['START', 'X', 'Y', 'Z', 'STOP']
emissions = ['a', 'b', 'c']

### Transitions

In [3]:
a = pd.DataFrame(index=states, columns=states).fillna(0)
a.loc['START', 'X'] = 0.5; a.loc['START', 'Z'] = 0.5
a.loc['X', 'Y'] = 0.4; a.loc['X', 'Z'] = 0.4; a.loc['X', 'STOP'] = 0.2
a.loc['Y', 'X'] = 0.2; a.loc['Y', 'Z'] = 0.2; a.loc['Y', 'STOP'] = 0.6
a.loc['Z', 'X'] = 0.4; a.loc['Z', 'Y'] = 0.6
a

Unnamed: 0,START,X,Y,Z,STOP
START,0,0.5,0.0,0.5,0.0
X,0,0.0,0.4,0.4,0.2
Y,0,0.2,0.0,0.2,0.6
Z,0,0.4,0.6,0.0,0.0
STOP,0,0.0,0.0,0.0,0.0


### Emissions

In [4]:
b = pd.DataFrame(index=states, columns=emissions).fillna(0)
b.loc['X', 'a'] = 0.4; b.loc['X', 'b'] = 0.6
b.loc['Y', 'a'] = 0.4; b.loc['Y', 'c'] = 0.6
b.loc['Z', 'a'] = 0.2; b.loc['Z', 'b'] = 0.6; b.loc['Z', 'c'] = 0.2
b

Unnamed: 0,a,b,c
START,0.0,0.0,0.0
X,0.4,0.6,0.0
Y,0.4,0.0,0.6
Z,0.2,0.6,0.2
STOP,0.0,0.0,0.0


## Problem

In [5]:
obs_seq = ['b', 'b']
n = len(obs_seq) + 2  # |obs_seq| + START + STOP

## Step 0: creating empty matrices
`P`: probability table  
`B`: backpointer table

In [6]:
P = pd.DataFrame(index=states, columns=range(n)).fillna(0)
P

Unnamed: 0,0,1,2,3
START,0,0,0,0
X,0,0,0,0
Y,0,0,0,0
Z,0,0,0,0
STOP,0,0,0,0


In [7]:
B = pd.DataFrame(index=states, columns=range(n))
B

Unnamed: 0,0,1,2,3
START,,,,
X,,,,
Y,,,,
Z,,,,
STOP,,,,


## Step 1: initialization

In [8]:
P.loc['START', 0] = 1
P

Unnamed: 0,0,1,2,3
START,1,0,0,0
X,0,0,0,0
Y,0,0,0,0
Z,0,0,0,0
STOP,0,0,0,0


## Step 2: recursion

In [9]:
for j in range(1, n-1):
    for v in states:  # curr state
        for u in states:  # prev state
            p = P.loc[u, j-1] * a.loc[u, v] * b.loc[v, obs_seq[j-1]]
            if p > P.loc[v, j]:
                P.loc[v, j] = p  # update probability table
                B.loc[v, j] = u  # update backpointer table

In [10]:
P

Unnamed: 0,0,1,2,3
START,1,0.0,0.0,0
X,0,0.3,0.072,0
Y,0,0.0,0.0,0
Z,0,0.3,0.072,0
STOP,0,0.0,0.0,0


In [11]:
B

Unnamed: 0,0,1,2,3
START,,,,
X,,START,Z,
Y,,,,
Z,,START,X,
STOP,,,,


## Step 3: termination

In [12]:
j = 3
v = 'STOP'
for u in states:
    p = P.loc[u, j-1] * a.loc[u, v]
    if p > P.loc[v, j]:
        P.loc[v, j] = p  # probability
        B.loc[v, j] = u  # backpointer

In [13]:
P

Unnamed: 0,0,1,2,3
START,1,0.0,0.0,0.0
X,0,0.3,0.072,0.0
Y,0,0.0,0.0,0.0
Z,0,0.3,0.072,0.0
STOP,0,0.0,0.0,0.0144


In [14]:
B

Unnamed: 0,0,1,2,3
START,,,,
X,,START,Z,
Y,,,,
Z,,START,X,
STOP,,,,X


## Step 4: backtracing

In [15]:
state_seq = ['STOP']
for i in range(n-1, 0, -1):
    curr_state = state_seq[-1]
    prev_state = B.loc[curr_state, i]
    state_seq.append(prev_state)
state_seq[::-1][1:-1]

['Z', 'X']