# Implement the Viterbi Algorithm

[ba10c](https://rosalind.info/problems/ba10c/)

## Decoding Problem

    Given:

A string x, followed by the alphabet Σ from which x was constructed, followed by the states States, transition matrix Transition, and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

    Return:

A path that maximizes the (unconditional) probability Pr(x, π) over all possible paths π.

In [97]:
import numpy as np

In [98]:
def viterbi(x, T, E, initial_distribution, states_cnt):
    seq_len = len(x)
    omega = np.zeros((seq_len, states_cnt))
    omega[0, :] = np.log(initial_distribution * E[:, x[0]])

    prev = np.zeros((seq_len - 1, states_cnt))
    for t in range(1, seq_len):
        for j in range(states_cnt):
            probability = omega[t - 1] + np.log(T[:, j]) + np.log(E[j, x[t]])
            prev[t - 1, j] = np.argmax(probability)
            omega[t, j] = np.max(probability)

    S = np.zeros(seq_len)
    last_state = np.argmax(omega[seq_len - 1, :])
    S[0] = last_state
    backtrack_index = 1
    for i in range(seq_len - 2, -1, -1):
        S[backtrack_index] = prev[i, int(last_state)]
        last_state = prev[i, int(last_state)]
        backtrack_index += 1

    return ''.join(['A' if s == 0 else 'B' for s in reversed(S)]) 

In [99]:
def parse_input(lines):
    x = lines[0].strip()
    alphabet = lines[2].strip().split()
    observations = np.array([int(alphabet.index(i)) for i in x])
    states = lines[4].strip().split()
    states_cnt = len(states)
    T = np.array([line.split()[1:] for line in lines[7:7 + states_cnt]], float)
    E = np.array([line.split()[1:] for line in lines[9 + states_cnt:]], float)
    return(observations, T, E, states_cnt)

In [100]:
with open("rosalind_ba10c.txt") as f:
    lines = f.readlines()
observations, T, E, states_cnt = parse_input(lines)
initial_distribution = np.array([0.5 for i in range(states_cnt)])
print(viterbi(observations, T, E, initial_distribution, states_cnt))


ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB
