In [18]:
import numpy as np
import pandas as pd

In [19]:
# Adverb, Noun, Preposition
tags = ['RB','NN','TO']

In [20]:
transition_counts = {
    ('NN', 'NN'): 16241,
    ('RB', 'RB'): 2263,
    ('TO', 'TO'): 2,
    ('NN', 'TO'): 5256,
    ('RB', 'TO'): 855,
    ('TO', 'NN'): 734,
    ('NN', 'RB'): 2431,
    ('RB', 'NN'): 358,
    ('TO', 'RB'): 200
}

In [21]:
n_tags = len(tags)
# Transition Matrix has first row corresponding to the pi state
transition_matrix = np.zeros((n_tags,n_tags))

In [22]:
sorted_tags = sorted(tags)

for i in range(n_tags):
    for j in range(n_tags):
        pair = (sorted_tags[i],sorted_tags[j])
        transition_matrix[i,j] = transition_counts.get(pair,0)

transition_matrix

array([[1.6241e+04, 2.4310e+03, 5.2560e+03],
       [3.5800e+02, 2.2630e+03, 8.5500e+02],
       [7.3400e+02, 2.0000e+02, 2.0000e+00]])

In [23]:
def print_matrix(matrix):
    print(pd.DataFrame(matrix,index=sorted_tags,columns=sorted_tags))

In [24]:
print_matrix(transition_matrix)

         NN      RB      TO
NN  16241.0  2431.0  5256.0
RB    358.0  2263.0   855.0
TO    734.0   200.0     2.0


In [27]:
m1 = transition_matrix/10
print_matrix(m1)

        NN     RB     TO
NN  1624.1  243.1  525.6
RB    35.8  226.3   85.5
TO    73.4   20.0    0.2


In [29]:
m2 = transition_matrix.sum(axis=1,keepdims=True)
m2

array([[23928.],
       [ 3476.],
       [  936.]])

In [30]:
# Final Transition Matrix (without pi state) of probabilities
transition_matrix = transition_matrix/m2
print_matrix(transition_matrix)

          NN        RB        TO
NN  0.678745  0.101596  0.219659
RB  0.102992  0.651036  0.245972
TO  0.784188  0.213675  0.002137


In [32]:
import math
t_matrix1 = np.copy(transition_matrix)
t_matrix2 = np.copy(transition_matrix)

In [33]:
for i in range(n_tags):
    t_matrix1[i,i] = math.log(m2[i])+t_matrix1[i,i]
print_matrix(t_matrix1)

           NN        RB        TO
NN  10.761549  0.101596  0.219659
RB   0.102992  8.804673  0.245972
TO   0.784188  0.213675  6.843752


In [36]:
diag = (np.diag(t_matrix2)).reshape(m2.shape)

In [37]:
diag = diag + np.vectorize(math.log)(m2)
np.fill_diagonal(t_matrix2,diag)
print_matrix(t_matrix2)

           NN        RB        TO
NN  10.761549  0.101596  0.219659
RB   0.102992  8.804673  0.245972
TO   0.784188  0.213675  6.843752


In [55]:
(t_matrix1==t_matrix2)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])