In [22]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
import torch.distributions
import torch.optim as optim
from torch import nn 
from torch.autograd import Variable

import torchtext
from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors, GloVe

In [23]:
class Model(torch.nn.Module):
    def __init__(self, batch_size, num_classes, mlp_out_size, vocab_size, embedding_length, weights, num_layers, hidden_size = 100, biDirectional = False):
        super(Model, self).__init__() 
        """
        Arguments
        ---------
        batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
        num_classes : 28 = (For full classification)
        hidden_sie : Size of the hidden_state of the LSTM  
        vocab_size : Size of the vocabulary containing unique words
        embedding_length : Embeddding dimension of GloVe word embeddings
        weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
        --------

        """

        self.batch_size = batch_size
        self.num_classes = num_classes
        self.vocab_size = vocab_size
        self.embedding_length = embedding_length
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.mlp_out_size = mlp_out_size
        self.biDirectional = biDirectional

        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
        self.word_embeddings.weights = nn.Parameter(weights, requires_grad=False)
        
        self.lstm_layer = LSTM(self.batch_size, self.hidden_size, self.embedding_length, self.biDirectional, self.num_layers)

        if(self.biDirectional):
            self.mlp = MLP(self.hidden_size*2, self.mlp_out_size, self.num_classes)
        else:
            self.mlp = MLP(self.hidden_size, self.mlp_out_size, self.num_classes)

    def forward(self, input_sequence):
        input_ = self.word_embeddings(input_sequence)
        out_lstm, final_hidden_state = self.lstm_layer(input_)
        if self.biDirectional:
            final_hidden_state = final_hidden_state.view(self.num_layers, 2, input_.shape[0], self.hidden_size) # num_layer x num_dir x batch x hidden
            final_hidden_state = final_hidden_state[-1]
            final_hidden_state = final_hidden_state.transpose(0,1).reshape(input_.shape[0], self.hidden_size*2)
        else:
            final_hidden_state = final_hidden_state[-1]
        
        mlp_output = self.mlp(final_hidden_state)
        predictions = torch.softmax(mlp_output, dim = -1)
        return predictions

In [24]:
class LSTM(torch.nn.Module):
    """
        Arguments
        ---------
        batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
        hidden_size : Size of the hidden_state of the LSTM  
        embedding_length : Embeddding dimension of GloVe word embeddings
        --------
    """
    def __init__(self, batch_size, hidden_size, embedding_length, biDirectional = False, num_layers = 2):

        super(LSTM, self).__init__()
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.embedding_length = embedding_length
        self.biDirectional= biDirectional
        self.num_layers = num_layers

        self.lstm = nn.LSTM(self.embedding_length, self.hidden_size, bidirectional = self.biDirectional, batch_first = True, num_layers = self.num_layers)   # Dropout  

    def forward(self, input_sequence, batch_size=None):
        out_lstm, (final_hidden_state, final_cell_state) = self.lstm(input_sequence)   # ouput dim: ( batch_size x seq_len x hidden_size )
        return out_lstm, final_hidden_state

In [25]:
class MLP(torch.nn.Module):
    def __init__(self, input_dim, output_dim, num_classes):
        super(MLP, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_classes = num_classes

        self.ff_1 = nn.Linear(self.input_dim, self.output_dim)
        self.relu = nn.ReLU()
        self.ff_2 = nn.Linear(self.output_dim, self.num_classes)
#         self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        out_1 = self.ff_1(x)
        out_relu = self.relu(out_1)
        out_2 = self.ff_2(out_relu)
#         out_sigmoid = self.sigmoid(out_2)

        return out_2

In [26]:
vocab_size = torch.load('models/class_15/vocab')
word_embeddings = torch.load('models/class_15/word_embeddings')

In [27]:
# Change accordingly
batch_size = 32
num_classes = 15

embedding_length = 100
mlp_out_size = 64
weights = word_embeddings
hidden_size = 100
num_layers = 3

In [28]:
# Change model/ path
loaded_model = Model(batch_size, num_classes, mlp_out_size, vocab_size, embedding_length, weights, num_layers, hidden_size, biDirectional=False)
loaded_model.load_state_dict(torch.load('models/class_15/LSTM_BS_32'))
loaded_model.eval()

Model(
  (word_embeddings): Embedding(38110, 100)
  (lstm_layer): LSTM(
    (lstm): LSTM(100, 100, num_layers=3, batch_first=True)
  )
  (mlp): MLP(
    (ff_1): Linear(in_features=100, out_features=64, bias=True)
    (relu): ReLU()
    (ff_2): Linear(in_features=64, out_features=15, bias=True)
  )
)

In [29]:
def test_sentence(test_sen):
    test_sen_list = TEXT.preprocess(test_sen)
    test_sen = [[TEXT.vocab.stoi[x] for x in test_sen_list]]
    # print(test_sen)

    test_sen = np.asarray(test_sen)
    test_sen = torch.LongTensor(test_sen)
    test_tensor = Variable(test_sen)

    loaded_model.eval()
    prediction = loaded_model(test_tensor)

    out_class = torch.argmax(prediction)
    
    # Can hardcode class label text instead of index
    return out_class

In [30]:
test_sen1 = "We need more boards and to create a bit more space for [NAME]. Then we’ll be good." # Neutral
test_sen2 = "Not surprised, damn that sucks. Concussions are awful." # Anger
# test_sen3 = "Are you kidding me!! Really??"
# test_sen3 = "seriously wtf. I want to see how the whole hand went in detail. that was the sickest soulread ever" # Anger
test_sen3 = "Thank you SO much! This is so genuine and so helpful. Thank you so much for your time and your thoughts."
test_sen4 = "I am so happy for you" # Joy
test_sen5 = "I do not know what that is." # Nuetral
test_sen6 = "Are you kidding me!! Really??" # Joy

test_sen = [test_sen1, test_sen2, test_sen3, test_sen4, test_sen5, test_sen6]

for i in range(6):
    print('------------')
    x = test_sentence(test_sen[i])
    print(x.item())

------------
0
------------
1
------------
6
------------
0
------------
2
------------
7
