In [68]:
import numpy as np
import model
import torch
from torch import nn, optim
from torch.autograd import Variable

import timeit
import pandas as pd
from sklearn import metrics 


inp_dim = 25
hidden_dim = 64
n_classes = 8

save_path = 'models/'
data_path = 'data/'


entities_dict = {0: "Rachel Green", 1: "Ross Geller", 2: "Chandler Bing", 3: "Monica Geller", 4: "Joey Tribbiani", 
                 5: "Phoebe Buffay", 6: "Others", 7: "None"}


# Train input embeddings
train_input = np.load(data_path + 'train_input.npy')

# Train labels in form indexes from entity map
train_label_index = np.load(data_path + 'train_label_index.npy')

# Test input embeddings
test_input = np.load(data_path + 'test_input.npy')

# Test labels in form indexes from entity map
test_label_index = np.load(data_path + 'test_label_index.npy')


# Using gpu if available else cpu
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mo = model.SimpleLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + 'SimpleLSTM_FinalLoss_0.21003304342390275.pt'))


total_tokens = 0
total_seq = 0

correct_tokens = 0
correct_seq = 0

y_correct = torch.Tensor().type(torch.LongTensor)
y_predicted = torch.Tensor().type(torch.LongTensor)

start = timeit.default_timer()
for i in range(test_input.shape[0]):
    
    #input sample shape:  (3,25) -> (3,1,25)
    #3 words each of dim 25
    inp = torch.from_numpy(test_input[i].reshape((-1,1,25))).to(device)
    
    #truth value for input sample: tensor([7, 4, 7])
    #each value is prediction class for the word
    truth = torch.from_numpy(test_label_index[i])
    y_correct = torch.cat((y_correct,truth))
    
    #predcited op shape: torch.Size([3, 8])
    out = mo(inp)
    
    #getting class with max probabilities
    out = torch.max(out,1)[1]
    y_predicted = torch.cat((y_predicted, out))
    
    # Following code is to calculate accuracy seperately for sequence and token entities
    
    #match each elem separately and returns a tensor of 0/1
    check = torch.eq(truth,out)
    seq_len = check.size()[0]
    
    #summing all 1's
    correct_tokens_temp = torch.sum(check).item()
    correct_tokens += correct_tokens_temp
    
    if (seq_len == correct_tokens_temp):
        correct_seq += 1
    
    total_tokens += seq_len
    total_seq += 1


print("\nTotal time taken: %.4f seconds." % (timeit.default_timer() - start))

token_accuracy = correct_tokens / total_tokens
print('\nAccuracy considering one entity at a time: ' + str(round(token_accuracy,4)))

seq_accuracy = correct_seq / total_seq
print('Accuracy considering one whole sequence at a time: ' + str(round(seq_accuracy,4)) + "\n")

assert y_correct.shape == y_predicted.shape, "**Shape Mismatch**"

confusion_mat = metrics.confusion_matrix(y_correct, y_predicted)

#calculating accuracy for each class
accuracy = {}
for i in range(8):
    total_pred = sum(confusion_mat[i])
    correct_pred = confusion_mat[i][i]
    accuracy[entities_dict[i]] = round(correct_pred/total_pred, 4)
    
print("\nAccuracy for each class:-")
for k,v in accuracy.items():
    print("{0}: {1}".format(k,v))
    
pd.DataFrame(confusion_mat,
            columns = entities_dict.values(),
            index = entities_dict.values())




Total time taken: 15.9502 seconds.

Accuracy considering one entity at a time: 0.9392
Accuracy considering one whole sequence at a time: 0.5037


Accuracy for each class:-
Rachel Green: 0.5699
Ross Geller: 0.7196
Chandler Bing: 0.7162
Monica Geller: 0.615
Joey Tribbiani: 0.5993
Phoebe Buffay: 0.6498
Others: 0.7086
None: 0.9937


Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,599,108,126,1,26,4,155,32
Ross Geller,3,1088,70,2,84,3,234,28
Chandler Bing,4,89,684,1,8,0,154,15
Monica Geller,3,99,70,540,30,3,121,12
Joey Tribbiani,1,165,47,0,552,4,135,17
Phoebe Buffay,6,104,50,0,14,525,99,10
Others,9,393,178,7,95,10,3042,559
,9,21,16,6,10,17,242,50298


In [69]:

accuracy = np.array(list(accuracy.values())).reshape(1,8)

confusion_mat = np.vstack((confusion_mat, accuracy))

pd.DataFrame(confusion_mat,
            columns = entities_dict.values(),
            index = list(entities_dict.values()) + ["**Accuary**"])


Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,599.0,108.0,126.0,1.0,26.0,4.0,155.0,32.0
Ross Geller,3.0,1088.0,70.0,2.0,84.0,3.0,234.0,28.0
Chandler Bing,4.0,89.0,684.0,1.0,8.0,0.0,154.0,15.0
Monica Geller,3.0,99.0,70.0,540.0,30.0,3.0,121.0,12.0
Joey Tribbiani,1.0,165.0,47.0,0.0,552.0,4.0,135.0,17.0
Phoebe Buffay,6.0,104.0,50.0,0.0,14.0,525.0,99.0,10.0
Others,9.0,393.0,178.0,7.0,95.0,10.0,3042.0,559.0
,9.0,21.0,16.0,6.0,10.0,17.0,242.0,50298.0
**Accuary**,0.5699,0.7196,0.7162,0.615,0.5993,0.6498,0.7086,0.9937
