In [1]:
import numpy as np

In [2]:
def viterbi(words, states, start_prob, trans_prob, emit_prob):

    V = np.zeros((len(states), len(words)))
    backpointer = np.zeros((len(states), len(words)), dtype=int)
    
    print("No. of Words: ", len(words))
    print("No. of States: ",len(states))
    print(emit_prob.shape)

    for s in range(len(states)):
        V[s,0] = start_prob[s] * emit_prob[s, words[0]]

    for t in range(1, len(words)):
        for s in range(len(states)):
            prob = [V[prev_s, t-1] * trans_prob[prev_s, s] * emit_prob[s, words[t]] for prev_s in range(len(states))]
            V[s, t] = max(prob)
            backpointer[s, t] = np.argmax(prob)
        print(backpointer[s, t])

    best_seq = []
    best_prob = max(V[:, -1])
    best_pointer = np.argmax(V[:, -1])
    best_seq.append(states[best_pointer])
    for t in range(len(words)-1, 0, -1):
        best_pointer = backpointer[best_pointer, t]
        best_seq.insert(0, states[best_pointer])
        
    print("Viterbi matrix:")
    print(V)
    print("Backpointer matrix:")
    print(backpointer)

    return best_seq, best_prob

In [7]:
sentence = ['the', 'dog', 'barks']
states = ['NOUN', 'VERB']

start_prob = np.array([0.5, 0.5])
trans_prob = np.array([[0.4, 0.6],
                       [0.3, 0.7]])
emit_prob = np.array([[0.2, 0.1, 0],
                      [0, 0, 0.5]])

In [8]:
word2idx = {word: idx for idx, word in enumerate(sentence)}

In [9]:
tags, probability = viterbi([word2idx[word] for word in sentence], 
                            states, 
                            start_prob, 
                            trans_prob, 
                            emit_prob)

No. of Words:  3
No. of States:  2
(2, 3)
0
0
Viterbi matrix:
[[0.1    0.004  0.    ]
 [0.     0.     0.0012]]
Backpointer matrix:
[[0 0 0]
 [0 0 0]]


In [10]:
print("POS tags for the sentence:")
for word, tag in zip(sentence, tags):
    print(word, "->", tag)

print("Probability:", probability)

POS tags for the sentence:
the -> NOUN
dog -> NOUN
barks -> VERB
Probability: 0.0012000000000000003


In [2]:
# word2idx

In [3]:
# transition_matrix = np.array([
#     [0.4, 0.4, 0.2],
#     [0.3, 0.3, 0.4], 
#     [0.2, 0.6, 0.2] 
# ])

# emission_matrix = np.array([
#     [0.2, 0.3, 0.1, 0.2, 0.2], 
#     [0.1, 0.1, 0.3, 0.4, 0.1], 
#     [0.3, 0.2, 0.1, 0.1, 0.3] 
# ])

# start_matrix = np.array([0.4, 0.3, 0.3])

In [4]:
# pos_tags

In [5]:
# sentence = ['cat', 'is', 'on', 'the', 'mat']

# pos_tags = ['NOUN', 'VERB', 'PREP']