In [39]:
import numpy as np
import model
import torch
from torch import nn, optim
from torch.autograd import Variable

import timeit
import pandas as pd
from sklearn import metrics 


inp_dim = 25
hidden_dim = 64
n_classes = 8

save_path = 'models/'
data_path = 'data/'


entities_dict = {0: "Rachel Green", 1: "Ross Geller", 2: "Chandler Bing", 3: "Monica Geller", 4: "Joey Tribbiani", 
                 5: "Phoebe Buffay", 6: "Others", 7: "None"}


def evaluate(mo, model_type = ""):

    print("Evaluating" + model_type + "...")
    
    # Train input embeddings
    train_input = np.load(data_path + 'train_input.npy')

    # Train labels in form indexes from entity map
    train_label_index = np.load(data_path + 'train_label_index.npy')

    # Test input embeddings
    test_input = np.load(data_path + 'test_input.npy')

    # Test labels in form indexes from entity map
    test_label_index = np.load(data_path + 'test_label_index.npy')

    # Using gpu if available else cpu
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    total_tokens = 0
    total_seq = 0

    correct_tokens = 0
    correct_seq = 0

    y_correct = torch.Tensor().type(torch.LongTensor)
    y_predicted = torch.Tensor().type(torch.LongTensor)

    start = timeit.default_timer()
    for i in range(test_input.shape[0]):

        #input sample shape:  (3,25) -> (3,1,25)
        #3 words each of dim 25
        inp = torch.from_numpy(test_input[i].reshape((-1,1,25))).to(device)

        #truth value for input sample: tensor([7, 4, 7])
        #each value is prediction class for the word
        truth = torch.from_numpy(test_label_index[i])
        y_correct = torch.cat((y_correct,truth))

        #predcited op shape: torch.Size([3, 8])
        out = mo(inp)

        #getting class with max probabilities
        out = torch.max(out,1)[1]
        y_predicted = torch.cat((y_predicted, out))

        # Following code is to calculate accuracy seperately for sequence and token entities

        #match each elem separately and returns a tensor of 0/1
        check = torch.eq(truth,out)
        seq_len = check.size()[0]

        #summing all 1's
        correct_tokens_temp = torch.sum(check).item()
        correct_tokens += correct_tokens_temp

        if (seq_len == correct_tokens_temp):
            correct_seq += 1

        total_tokens += seq_len
        total_seq += 1


    print("\nTotal time taken: %.4f seconds." % (timeit.default_timer() - start))

    #seq_accuracy = correct_seq / total_seq
    #print('Accuracy considering one whole sequence at a time: ' + str(round(seq_accuracy,4)) + "\n")

    assert y_correct.shape == y_predicted.shape, "**Shape Mismatch**"

    confusion_mat = metrics.confusion_matrix(y_correct, y_predicted)

    #calculating accuracy for each class
    accuracy = {}
    for i in range(8):
        #predictions for i-th entity is in i-th row
        total_pred = sum(confusion_mat[i])
        correct_pred = confusion_mat[i][i]
        accuracy[entities_dict[i]] = round(correct_pred/total_pred, 4)

    print("\n*****Accuracy for each entity:*****")
    for k,v in accuracy.items():
        print("{0:<20} {1}".format(k,v))
    
    token_accuracy = correct_tokens / total_tokens
    
    print("\n{0:<40} {1:.4f}".format("Average accuracy per entity: ", sum(accuracy.values())/len(accuracy)))
    print("{0:<40} {1:.4f}".format("Overall accuracy considering tokens: ", token_accuracy))
        
    return accuracy, confusion_mat


In [42]:
#Evaluate Bidirectional LSTM

mo = model.BiLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + '21-BiLSTM_Loss_0.15430419193649092.pt'))
accuracy, confusion_mat = evaluate(mo, " Bidirectional LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())


Evaluating Bidirectional LSTM...

Total time taken: 36.3615 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.7041
Ross Geller          0.5952
Chandler Bing        0.6387
Monica Geller        0.6538
Joey Tribbiani       0.6384
Phoebe Buffay        0.6584
Others               0.8025
None                 0.9909

Average accuracy per entity:             0.7102
Overall accuracy considering tokens:     0.9428

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,740,32,13,8,20,11,209,18
Ross Geller,55,900,35,16,96,17,352,41
Chandler Bing,49,25,610,12,16,13,207,23
Monica Geller,50,34,27,574,35,7,139,12
Joey Tribbiani,53,59,16,3,588,13,168,21
Phoebe Buffay,46,37,24,7,20,532,128,14
Others,144,97,59,14,101,43,3445,390
,18,18,3,11,9,10,393,50157


In [46]:
#Evaluate Bidirectional LSTM

mo = model.BiLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + '31-BiLSTM_Loss_0.1387677234002888.pt'))
accuracy, confusion_mat = evaluate(mo, " Bidirectional LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())

Evaluating Bidirectional LSTM...

Total time taken: 39.8779 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.7041
Ross Geller          0.5833
Chandler Bing        0.645
Monica Geller        0.6538
Joey Tribbiani       0.6374
Phoebe Buffay        0.6658
Others               0.8053
None                 0.9906

Average accuracy per entity:             0.7107
Overall accuracy considering tokens:     0.9427

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,740,32,13,10,19,17,201,19
Ross Geller,51,882,36,21,94,22,369,37
Chandler Bing,46,29,616,16,17,13,198,20
Monica Geller,52,21,29,574,32,11,145,14
Joey Tribbiani,51,55,18,6,587,17,168,19
Phoebe Buffay,42,34,20,10,16,538,133,15
Others,147,87,60,21,90,52,3457,379
,15,17,4,13,10,12,404,50144


In [43]:
#Evaluate Bidirectional LSTM

mo = model.BiLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + '71-BiLSTM_Loss_0.09155423128380817.pt'))
accuracy, confusion_mat = evaluate(mo, " Bidirectional LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())

Evaluating Bidirectional LSTM...

Total time taken: 37.5700 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.6984
Ross Geller          0.5767
Chandler Bing        0.6545
Monica Geller        0.6686
Joey Tribbiani       0.6363
Phoebe Buffay        0.6547
Others               0.7694
None                 0.9884

Average accuracy per entity:             0.7059
Overall accuracy considering tokens:     0.9382

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,734,27,18,16,21,14,196,25
Ross Geller,64,872,50,29,83,26,346,42
Chandler Bing,60,30,625,27,28,11,150,24
Monica Geller,48,24,27,587,25,6,149,12
Joey Tribbiani,56,57,22,11,586,15,153,21
Phoebe Buffay,42,35,22,15,19,529,132,14
Others,189,115,90,39,114,53,3303,390
,21,28,14,18,12,14,482,50030


In [45]:
#Evaluate Bidirectional LSTM

mo = model.BiLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + '1-BiLSTM_Loss_0.5018732744513754.pt'))
accuracy, confusion_mat = evaluate(mo, " Bidirectional LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())

Evaluating Bidirectional LSTM...

Total time taken: 39.7234 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.2607
Ross Geller          0.2599
Chandler Bing        0.0
Monica Geller        0.0
Joey Tribbiani       0.0065
Phoebe Buffay        0.1262
Others               0.5672
None                 0.9931

Average accuracy per entity:             0.2767
Overall accuracy considering tokens:     0.8762

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,274,122,0,0,0,0,522,133
Ross Geller,87,393,0,0,0,0,719,313
Chandler Bing,79,260,0,0,0,0,493,123
Monica Geller,95,237,0,0,0,0,425,121
Joey Tribbiani,101,217,0,0,6,0,457,140
Phoebe Buffay,125,92,0,0,1,102,376,112
Others,68,110,0,2,0,0,2435,1678
,12,9,0,0,0,0,329,50269


In [52]:
#Evaluate Normal LSTM

mo = model.SimpleLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + 'SimpleLSTM_FinalLoss_0.21003304342390275.pt'))
accuracy, confusion_mat = evaluate(mo, " Normal LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())


Evaluating Normal LSTM...

Total time taken: 21.0234 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.5699
Ross Geller          0.7196
Chandler Bing        0.7162
Monica Geller        0.615
Joey Tribbiani       0.5993
Phoebe Buffay        0.6498
Others               0.7086
None                 0.9937

Average accuracy per entity:             0.6965
Overall accuracy considering tokens:     0.9392

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,599,108,126,1,26,4,155,32
Ross Geller,3,1088,70,2,84,3,234,28
Chandler Bing,4,89,684,1,8,0,154,15
Monica Geller,3,99,70,540,30,3,121,12
Joey Tribbiani,1,165,47,0,552,4,135,17
Phoebe Buffay,6,104,50,0,14,525,99,10
Others,9,393,178,7,95,10,3042,559
,9,21,16,6,10,17,242,50298


In [49]:
#Evaluate Normal LSTM

mo = model.SimpleLSTM(inp_dim, hidden_dim, n_classes)
mo.load_state_dict(torch.load(save_path + '1000-SimpleLSTM_FinalLoss_1.018389134275678e-05.pt'))
accuracy, confusion_mat = evaluate(mo, " Normal LSTM")

print("\n***Confusion Matrix***\n")
pd.DataFrame(confusion_mat,
                columns = entities_dict.values(),
                index = entities_dict.values())


Evaluating Normal LSTM...

Total time taken: 19.6064 seconds.

*****Accuracy for each entity:*****
Rachel Green         0.4215
Ross Geller          0.2937
Chandler Bing        0.4335
Monica Geller        0.0911
Joey Tribbiani       0.0456
Phoebe Buffay        0.0347
Others               0.181
None                 0.9786

Average accuracy per entity:             0.3100
Overall accuracy considering tokens:     0.8481

***Confusion Matrix***



Unnamed: 0,Rachel Green,Ross Geller,Chandler Bing,Monica Geller,Joey Tribbiani,Phoebe Buffay,Others,None
Rachel Green,443,108,18,98,2,7,67,308
Ross Geller,131,444,40,75,15,20,77,710
Chandler Bing,32,102,414,41,13,2,38,313
Monica Geller,98,294,29,80,14,9,55,299
Joey Tribbiani,53,297,44,69,42,17,34,365
Phoebe Buffay,146,247,9,57,4,28,37,280
Others,433,325,77,125,10,14,777,2532
,191,297,118,82,51,6,336,49538


In [None]:
#Normal LSTM

SimpleLSTM_FinalLoss_0.21003304342390275

*****Accuracy for each entity:*****
Rachel Green         0.5699
Ross Geller          0.7196
Chandler Bing        0.7162
Monica Geller        0.615
Joey Tribbiani       0.5993
Phoebe Buffay        0.6498
Others               0.7086
None                 0.9937

Average accuracy per entity:             0.6965
Overall accuracy considering tokens:     0.9392
---------------------------------------        

1000-SimpleLSTM_FinalLoss_1.018389134275678e-05

*****Accuracy for each entity:*****
Rachel Green         0.4215
Ross Geller          0.2937
Chandler Bing        0.4335
Monica Geller        0.0911
Joey Tribbiani       0.0456
Phoebe Buffay        0.0347
Others               0.181
None                 0.9786

Average accuracy per entity:             0.3100
Overall accuracy considering tokens:     0.8481
---------------------------------------    
    

In [None]:
#Bidirectional LSTM

1-BiLSTM_Loss_0.5018732744513754

*****Accuracy for each entity:*****
Rachel Green         0.2607
Ross Geller          0.2599
Chandler Bing        0.0
Monica Geller        0.0
Joey Tribbiani       0.0065
Phoebe Buffay        0.1262
Others               0.5672
None                 0.9931

Average accuracy per entity:             0.2767
Overall accuracy considering tokens:     0.8762
---------------------------------------    

11-BiLSTM_Loss_0.1801687417329021

*****Accuracy for each entity:*****
Rachel Green         0.687
Ross Geller          0.5873
Chandler Bing        0.5958
Monica Geller        0.6367
Joey Tribbiani       0.633
Phoebe Buffay        0.6485
Others               0.7955
None                 0.9902

Average accuracy per entity:             0.6967
Overall accuracy considering tokens:     0.9402
---------------------------------------    

21-BiLSTM_Loss_0.15430419193649092

*****Accuracy for each entity:*****
Rachel Green         0.7041
Ross Geller          0.5952
Chandler Bing        0.6387
Monica Geller        0.6538
Joey Tribbiani       0.6384
Phoebe Buffay        0.6584
Others               0.8025
None                 0.9909

Average accuracy per entity:             0.7102
Overall accuracy considering tokens:     0.9428
---------------------------------------    

31-BiLSTM_Loss_0.1387677234002888

*****Accuracy for each entity:*****
Rachel Green         0.7041
Ross Geller          0.5833
Chandler Bing        0.645
Monica Geller        0.6538
Joey Tribbiani       0.6374
Phoebe Buffay        0.6658
Others               0.8053
None                 0.9906

Average accuracy per entity:             0.7107
Overall accuracy considering tokens:     0.9427
---------------------------------------    


71-BiLSTM_Loss_0.09155423128380817

*****Accuracy for each entity:*****
Rachel Green         0.6984
Ross Geller          0.5767
Chandler Bing        0.6545
Monica Geller        0.6686
Joey Tribbiani       0.6363
Phoebe Buffay        0.6547
Others               0.7694
None                 0.9884

Average accuracy per entity:             0.7059
Overall accuracy considering tokens:     0.9382

---------------------------------------


