# Hidden Markov Model (HMM)

Sources:
* [Hidden Markov Models - Speech and Language Processing. Daniel Jurafsky & James H. Martin](https://web.stanford.edu/~jurafsky/slp3/A.pdf)
* [A tutorial on hidden Markov models and selected applications in speech recognition - Rabiner](https://www.ece.ucsb.edu/Faculty/Rabiner/ece259/Reprints/tutorial%20on%20hmm%20and%20applications.pdf)
* [The Application of Hidden Markov Models in Speech Recognition](https://mi.eng.cam.ac.uk/~mjfg/mjfg_NOW.pdf)


In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from IPython.display import display

In [2]:
n_hidden_states = 2
n_observation_states = 3

p_init = np.array([0.8, 0.2])

p_transition = np.array([
    [0.6, 0.4],
    [0.5, 0.5]])

p_emission = np.array([
    [0.2, 0.5],
    [0.4, 0.4],
    [0.4, 0.1]])



hidden_state_names = ['Hot', 'Cold']

observation_names = ['1', '2', '3']

hidden_state_enc = np.eye(n_hidden_states)  # One-hot encoding of hidden states
observation_state_enc = np.eye(n_observation_states)  # One-hot encoding of observed states

In [3]:
print('Inital state probability:')
display(pd.DataFrame(p_init, index=[f'P({s})' for s in hidden_state_names], columns=["Prob"]))

print('\nTransition Probability Matrix:')
display(pd.DataFrame(p_transition, index=[f'P(*|{s})' for s in hidden_state_names], columns=[f'P({s}|*)' for s in hidden_state_names]))


print('\nEmission Probability Matrix:')
display(pd.DataFrame(p_emission, index=[f'P({o}|*)' for o in observation_names], columns=[f'P(*|{s})' for s in hidden_state_names]))


Inital state probability:


Unnamed: 0,Prob
P(Hot),0.8
P(Cold),0.2



Transition Probability Matrix:


Unnamed: 0,P(Hot|*),P(Cold|*)
P(*|Hot),0.6,0.4
P(*|Cold),0.5,0.5



Emission Probability Matrix:


Unnamed: 0,P(*|Hot),P(*|Cold)
P(1|*),0.2,0.5
P(2|*),0.4,0.4
P(3|*),0.4,0.1


## Sampling from HMM

In [4]:

sample_sequence = []
hidden_state_prob = p_init

for i in range(10):
    #   print('i: ', i)
    #   print('hidden_state_prob: ', hidden_state_prob.shape, hidden_state_prob)
    state_idx = np.random.choice(n_hidden_states, p=hidden_state_prob, size=(1,))
    sample_sequence.append(int(state_idx))
    #   print('state_idx: ', state_idx.shape, state_idx)
    state_enc = hidden_state_enc[state_idx]
    #   print('state_enc: ', state_enc.shape, state_enc)
    observation_probs = p_emission @ state_enc.T
    #   print('observation_probs: ', observation_probs.shape, observation_probs)
    observation_idx = np.random.choice(n_observation_states, p=observation_probs[:,0], size=(1,))
    #   print('observation_idx: ', observation_idx.shape, observation_idx)
    hidden_state_prob = (state_enc @ p_transition)[0,:]
  
print(sample_sequence)

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]


## Likelihood Computation: The Forward Algorithm

$$
\begin{align}
\alpha_{t+1} & =\alpha_{t} \cdot T \cdot Q_{o} \\
\alpha_{t+1} &= 
\begin{bmatrix}
\alpha_{t}(H) & \alpha_{t}(C)
\end{bmatrix}
\cdot
\begin{bmatrix}
P(H|H) & P(C|H) \\
P(H|C) & P(C|C)
\end{bmatrix}
\cdot
\begin{bmatrix}
P(O|H) & 0 \\
0 & P(O|C)
\end{bmatrix}
\end{align}
$$

In [5]:
observed_seq = [2, 0 , 2]
# observed_seq = [1, 1, 1, 1, 1]


hidden_state_prob = np.diag(p_init)
alpha = np.array([1, 1])


for idx, observation_idx in enumerate(observed_seq):
    print('\nidx: ', idx, 'observation_idx: ', observation_idx)
    print('hidden_state_prob: ', hidden_state_prob.shape, hidden_state_prob)
    p_obs_given_hidden = p_emission[observation_idx, :]
    print('p_obs_given_hidden: ', p_obs_given_hidden.shape, p_obs_given_hidden)
    alpha = alpha @ hidden_state_prob @ np.diag(p_obs_given_hidden)
    print('alpha: ', alpha.shape, alpha)
    hidden_state_prob = p_transition

final_prob = alpha.sum()
print('final_prob: ', final_prob)


idx:  0 observation_idx:  2
hidden_state_prob:  (2, 2) [[0.8 0. ]
 [0.  0.2]]
p_obs_given_hidden:  (2,) [0.4 0.1]
alpha:  (2,) [0.32 0.02]

idx:  1 observation_idx:  0
hidden_state_prob:  (2, 2) [[0.6 0.4]
 [0.5 0.5]]
p_obs_given_hidden:  (2,) [0.2 0.5]
alpha:  (2,) [0.0404 0.069 ]

idx:  2 observation_idx:  2
hidden_state_prob:  (2, 2) [[0.6 0.4]
 [0.5 0.5]]
p_obs_given_hidden:  (2,) [0.4 0.1]
alpha:  (2,) [0.023496 0.005066]
final_prob:  0.028562000000000008


## Decoding: The Viterbi Algorithm

$$
\begin{align}
v_{t+1} & =\text{max(col)}( v_{t} \cdot T \cdot Q_{o} )\\
v_{t+1} &= \text{max(col)}(
\begin{bmatrix}
v_{t}(H) & 0 \\
0 & v_{t}(C)
\end{bmatrix}
\cdot
\begin{bmatrix}
P(H|H) & P(C|H) \\
P(H|C) & P(C|C)
\end{bmatrix}
\cdot
\begin{bmatrix}
P(O|H) & 0 \\
0 & P(O|C)
\end{bmatrix}
)
\end{align}
$$

In [6]:
observed_seq = [2, 0 , 2]
# observed_seq = [1, 1, 1, 1, 1]


hidden_state_prob = np.diag(p_init)
viterbi = np.array([1, 1])
decoded_seq = []

for idx, observation_idx in enumerate(observed_seq):
    print('\nidx: ', idx, 'observation_idx: ', observation_idx)
    print('hidden_state_prob: ', hidden_state_prob.shape, hidden_state_prob)
    p_obs_given_hidden = p_emission[observation_idx, :]
    print('p_obs_given_hidden: ', p_obs_given_hidden.shape, p_obs_given_hidden)
    viterbi = np.diag(viterbi) @ hidden_state_prob @ np.diag(p_obs_given_hidden)
    print('viterbi: ', viterbi.shape, viterbi)
    backpointer = np.argmax(viterbi, axis=0)
    print('backpointer: ', backpointer.shape, backpointer)
    viterbi = np.diag(viterbi[backpointer])
    print('viterbi: ', viterbi.shape, viterbi)
    decoded_seq.append(np.argmax(viterbi))
    hidden_state_prob = p_transition

print('decoded_seq: ', decoded_seq)


idx:  0 observation_idx:  2
hidden_state_prob:  (2, 2) [[0.8 0. ]
 [0.  0.2]]
p_obs_given_hidden:  (2,) [0.4 0.1]
viterbi:  (2, 2) [[0.32 0.  ]
 [0.   0.02]]
backpointer:  (2,) [0 1]
viterbi:  (2,) [0.32 0.02]

idx:  1 observation_idx:  0
hidden_state_prob:  (2, 2) [[0.6 0.4]
 [0.5 0.5]]
p_obs_given_hidden:  (2,) [0.2 0.5]
viterbi:  (2, 2) [[0.0384 0.064 ]
 [0.002  0.005 ]]
backpointer:  (2,) [0 0]
viterbi:  (2,) [0.0384 0.064 ]

idx:  2 observation_idx:  2
hidden_state_prob:  (2, 2) [[0.6 0.4]
 [0.5 0.5]]
p_obs_given_hidden:  (2,) [0.4 0.1]
viterbi:  (2, 2) [[0.009216 0.001536]
 [0.0128   0.0032  ]]
backpointer:  (2,) [1 1]
viterbi:  (2,) [0.0128 0.0032]
decoded_seq:  [0, 1, 0]
