In [1]:
#Import the libraries
import numpy as np
import pandas as pd

In [2]:
#Define the custom tags
tags = ['RB', 'NN', 'TO']

In [3]:
#Get the transition counts
transition_counts = {
    ('NN', 'NN'): 16241,
    ('RB', 'RB'): 2263,
    ('TO', 'TO'): 2,
    ('NN', 'TO'): 5256,
    ('RB', 'TO'): 855,
    ('TO', 'NN'): 734,
    ('NN', 'RB'): 2431,
    ('RB', 'NN'): 358,
    ('TO', 'RB'): 200
}

In [4]:
#Create the transition matrix using numpy

#Get the number of tags
n_tags = len(tags)

#Create empty square matrix
transition_matrix = np.zeros((n_tags, n_tags))

#Print matrix and its shape
print("Shape: ", transition_matrix.shape)
transition_matrix

Shape:  (3, 3)


array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [5]:
#Sort the tags for consistency
sorted_tags = sorted(tags)
sorted_tags

['NN', 'RB', 'TO']

In [6]:
#Populate the transition matrix with the corresponding tags and transitions from dictionary
for i in range(n_tags):
    for j in range(n_tags):
        
        #Get the pair
        tag_pair = (sorted_tags[i], sorted_tags[j])
        #Add the tuple to the matrix
        transition_matrix[i, j] = transition_counts.get(tag_pair)
transition_matrix

array([[1.6241e+04, 2.4310e+03, 5.2560e+03],
       [3.5800e+02, 2.2630e+03, 8.5500e+02],
       [7.3400e+02, 2.0000e+02, 2.0000e+00]])

In [7]:
#Convert to Pandas Dataframe for better readability
def printdf(transition_matrix):
    print(pd.DataFrame(transition_matrix, index = sorted_tags, columns = sorted_tags))
printdf(transition_matrix)

         NN      RB      TO
NN  16241.0  2431.0  5256.0
RB    358.0  2263.0   855.0
TO    734.0   200.0     2.0


In [8]:
#Compute the sum of each row and store in np array
row_sum = transition_matrix.sum(axis = 1, keepdims = True)
row_sum

array([[23928.],
       [ 3476.],
       [  936.]])

In [9]:
#Normalize the transition matrix
transition_matrix = transition_matrix / row_sum
printdf(transition_matrix)

          NN        RB        TO
NN  0.678745  0.101596  0.219659
RB  0.102992  0.651036  0.245972
TO  0.784188  0.213675  0.002137


In [11]:
#Check if the sum of row is equal to 1
row_sum_ = transition_matrix.sum(axis = 1, keepdims = True)
row_sum_

array([[1.],
       [1.],
       [1.]])