In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
import torch.distributions
import torch.optim as optim
from torch import nn 
from torch.autograd import Variable

import torchtext
from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors, GloVe

In [2]:
class Model(torch.nn.Module):
    def __init__(self, batch_size, num_classes, mlp_out_size, vocab_size, embedding_length, weights, num_layers, hidden_size = 100, biDirectional = False):
        super(Model, self).__init__() 
        """
        Arguments
        ---------
        batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
        num_classes : 28 = (For full classification)
        hidden_sie : Size of the hidden_state of the LSTM   (// Later BiLSTM)
        vocab_size : Size of the vocabulary containing unique words
        embedding_length : Embeddding dimension of GloVe word embeddings
        weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
        --------

        """

        self.batch_size = batch_size
        self.num_classes = num_classes
        self.vocab_size = vocab_size
        self.embedding_length = embedding_length
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.mlp_out_size = mlp_out_size
        self.biDirectional = biDirectional

        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
        self.word_embeddings.weights = nn.Parameter(weights, requires_grad=False)
        
        self.lstm_layer = LSTM(self.batch_size, self.hidden_size, self.embedding_length, self.biDirectional, self.num_layers)

        if(self.biDirectional):
            self.mlp = MLP(self.hidden_size*2, self.mlp_out_size, self.num_classes)
#             self.FF = nn.Linear(self.hidden_size*2, num_classes)
        else:
            self.mlp = MLP(self.hidden_size, self.mlp_out_size, self.num_classes)
#             self.FF = nn.Linear(self.hidden_size, self.num_classes)

    def forward(self, input_sequence):
        input_ = self.word_embeddings(input_sequence)
        out_lstm, final_hidden_state = self.lstm_layer(input_)
        if self.biDirectional:
            final_hidden_state = final_hidden_state.view(self.num_layers, 2, input_.shape[0], self.hidden_size) # num_layer x num_dir x batch x hidden
            final_hidden_state = final_hidden_state[-1]
            final_hidden_state = final_hidden_state.transpose(0,1).reshape(input_.shape[0], self.hidden_size*2)
        else:
            final_hidden_state = final_hidden_state[-1]
        
        mlp_output = self.mlp(final_hidden_state)
#         ff_output = self.FF(mlp_output)
#         print("FF out size: ", ff_output.shape)
        predictions = torch.softmax(mlp_output, dim = -1)
        return predictions

In [3]:
class LSTM(torch.nn.Module):
    """
        Arguments
        ---------
        batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
        hidden_size : Size of the hidden_state of the LSTM   (* Later BiLSTM, check dims for BiLSTM *)
        embedding_length : Embeddding dimension of GloVe word embeddings
        --------
    """
    def __init__(self, batch_size, hidden_size, embedding_length, biDirectional = False, num_layers = 2):

        super(LSTM, self).__init__()
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.embedding_length = embedding_length
        self.biDirectional= biDirectional
        self.num_layers = num_layers

        self.lstm = nn.LSTM(self.embedding_length, self.hidden_size, bidirectional = self.biDirectional, batch_first = True, num_layers = self.num_layers)   # Dropout  

    def forward(self, input_sequence, batch_size=None):
        out_lstm, (final_hidden_state, final_cell_state) = self.lstm(input_sequence)   # ouput dim: ( batch_size x seq_len x hidden_size )
        return out_lstm, final_hidden_state

In [4]:
# If want to add extra MLP Layer
class MLP(torch.nn.Module):
    def __init__(self, input_dim, output_dim, num_classes):
        super(MLP, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_classes = num_classes

        self.ff_1 = nn.Linear(self.input_dim, self.output_dim)
        self.relu = nn.ReLU()
        self.ff_2 = nn.Linear(self.output_dim, self.num_classes)
#         self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        out_1 = self.ff_1(x)
        out_relu = self.relu(out_1)
        out_2 = self.ff_2(out_relu)
#         out_sigmoid = self.sigmoid(out_2)

        return out_2

In [5]:
def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)

In [6]:
def train_model(model, optim, train_iter, epoch, batch_size, num_classes):
    total_epoch_loss = 0
    total_epoch_acc = 0
    
    steps = 0
    model.train()
    for idx, batch in enumerate(train_iter):
        text = batch.text[0]
        target = batch.labels
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not batch_size): # One of the batch returned by BucketIterator has length different than 32.
            continue
        optim.zero_grad()
        prediction = model(text)
#         print(prediction.shape)
#         print(prediction)
#         print(target.shape)
#         print(target)
        loss =  loss_fn(prediction, target)
#         if math.isnan(loss.item()):
#             print(prediction, target)

        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()
        acc = 100.0 * num_corrects/len(batch)
        loss.backward()
        clip_gradient(model, 1e-1)
        optim.step()
        steps += 1
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        
    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)

In [7]:
def eval_model(model, val_iter, batch_size, num_classes):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.eval()
    with torch.no_grad():
        for idx, batch in enumerate(val_iter):
            text = batch.text[0]
            if (text.size()[0] is not batch_size):
                continue
            target = batch.labels
            target = torch.autograd.Variable(target).long()
            if torch.cuda.is_available():
                text = text.cuda()
                target = target.cuda()
            prediction = model(text)
            # Sanity check
            # print("Test Prediction: ", prediction)
            # Defualt - Cross entropy loss funtion
            loss =  loss_fn(prediction, target)
            
            if math.isnan(loss.item()):
                print(prediction, target)
            
            num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()
            acc = 100.0 * num_corrects/len(batch)
            total_epoch_loss += loss.item()
            total_epoch_acc += acc.item()
            
    return total_epoch_loss/len(val_iter), total_epoch_acc/len(val_iter)

In [8]:
def load_data(batch_size= 16, embedding_length = 100):
    tokenize = lambda x: x.split()
    TEXT = data.Field(sequential=True, tokenize=tokenize, lower=True, include_lengths=True, batch_first=True, fix_length=30)
    LABELS = data.LabelField(batch_first=True, dtype=torch.float)

    train, val, test = data.TabularDataset.splits(
      path='/Users/prakruti/Documents/GoEmotions-classification/data/multi_class_15/', train='train.tsv',
      validation='dev.tsv', test='test.tsv', format='tsv',
      fields=[('text', TEXT), ('labels', LABELS)])
    
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
      (train, val, test), batch_sizes=(batch_size, batch_size, batch_size), sort_key=lambda x: len(x.text), device=0)

    # build the vocabulary
    TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=embedding_length))
    LABELS.build_vocab(train)
    print(LABELS.vocab.__dict__)

    word_embeddings = TEXT.vocab.vectors
    vocab_size = len(TEXT.vocab)

    return TEXT, vocab_size, word_embeddings, train_iter, val_iter, test_iter

In [11]:
# def vectorize(x):
#     if x == '':
#         return 5
#     else:
#         x = float(x)
#         return int(x)

In [9]:
batch_size = 32
TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data(batch_size)

The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.
The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.
The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.


{'freqs': Counter({'0': 4771, '1': 4387, '2': 4037, '3': 3181, '4': 3173, '5': 2939, '6': 2662, '7': 2191, '8': 1948, '9': 1716, '10': 1581, '11': 1368, '12': 1110, '13': 1060, '14': 760}), 'itos': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14'], 'unk_index': None, 'stoi': defaultdict(None, {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14}), 'vectors': None}


In [None]:
torch.save(vocab_size, 'models/class_15/BiLSTM_vocab')
torch.save(word_embeddings, 'models/class_15/BiLSTM_word_embeddings')

In [10]:
# Cross entropy loss
loss_fn = F.cross_entropy

learning_rate = 2e-4
embedding_length = 100
num_classes = 15
mlp_out_size = 64
weights = word_embeddings
hidden_size = 100
num_layers = 3

model = Model(batch_size, num_classes, mlp_out_size, vocab_size, embedding_length, weights, num_layers, hidden_size, biDirectional=True)
optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))

In [None]:
num_bad_epochs = 0
epoch = 0
least_loss = float('inf')
training_stats = pd.DataFrame(columns=['Epoch', 'Train_Loss', 'Train_Acc', 'Val_Loss', 'Val_Acc'])

while(True):
    print("Epoch", epoch)
    train_loss, train_acc = train_model(model, optim, train_iter, epoch, batch_size, num_classes)
    val_loss, val_acc = eval_model(model, valid_iter, batch_size, num_classes) 
    print(f'Epoch: {epoch+1:02}')
    if val_loss < least_loss:
        least_loss = val_loss
        num_bad_epochs = 0
        print("*** Least validation loss")
        torch.save(model.state_dict(), "models/class_15/BiLSTM_BS_32")
    else:
        num_bad_epochs += 1
#     print(f'Epoch: {epoch+1:2}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%')
    print(f'Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%') 
    print(f'Val Loss: {val_loss:3f}, Val Acc: {val_acc:.2f}%')
    print("-------------")
    
    training_stats = training_stats.append(
        pd.Series([epoch+1, train_loss, train_acc, val_loss, val_acc], index=training_stats.columns), 
        ignore_index=True)
    if num_bad_epochs >= 8:
        break
        
    epoch += 1
    if epoch == 100:
        break

Epoch 0
Epoch: 01
*** Least validation loss
Train Loss: 2.642, Train Acc: 15.93%
Val Loss: 2.616884, Val Acc: 17.35%
-------------
Epoch 1
Epoch: 02
*** Least validation loss
Train Loss: 2.610, Train Acc: 19.67%
Val Loss: 2.562409, Val Acc: 23.17%
-------------
Epoch 2
Epoch: 03
*** Least validation loss
Train Loss: 2.546, Train Acc: 26.36%
Val Loss: 2.520485, Val Acc: 27.33%
-------------
Epoch 3
Epoch: 04
*** Least validation loss
Train Loss: 2.513, Train Acc: 29.81%
Val Loss: 2.503529, Val Acc: 29.12%
-------------
Epoch 4
Epoch: 05
Train Loss: 2.479, Train Acc: 33.36%
Val Loss: 2.524757, Val Acc: 27.00%
-------------
Epoch 5
Epoch: 06
*** Least validation loss
Train Loss: 2.453, Train Acc: 35.90%
Val Loss: 2.475300, Val Acc: 31.68%
-------------
Epoch 6
Epoch: 07
*** Least validation loss
Train Loss: 2.431, Train Acc: 38.06%
Val Loss: 2.449828, Val Acc: 34.53%
-------------
Epoch 7
Epoch: 08
*** Least validation loss
Train Loss: 2.413, Train Acc: 39.89%
Val Loss: 2.448612, Val Acc:

In [1]:
training_stats.to_csv('training_stats/class_15/BiLSTM_BS_32.csv')

NameError: name 'training_stats' is not defined

In [17]:
loaded_model = Model(batch_size, num_classes, mlp_out_size, vocab_size, embedding_length, weights,num_layers, hidden_size, biDirectional=False)
loaded_model.load_state_dict(torch.load('models/class_15/BiLSTM_BS_32'))
loaded_model.eval()

Model(
  (word_embeddings): Embedding(38110, 100)
  (lstm_layer): LSTM(
    (lstm): LSTM(100, 100, num_layers=3, batch_first=True)
  )
  (mlp): MLP(
    (ff_1): Linear(in_features=100, out_features=64, bias=True)
    (relu): ReLU()
    (ff_2): Linear(in_features=64, out_features=15, bias=True)
  )
)

In [20]:
test_loss, test_acc = eval_model(loaded_model, test_iter, batch_size, num_classes)
print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}')

Test Loss: 2.384, Test Acc: 41.07


In [21]:
def test_sentence(test_sen):
    test_sen_list = TEXT.preprocess(test_sen)
    print(test_sen_list)
    test_sen = [[TEXT.vocab.stoi[x] for x in test_sen_list]]

    test_sen = np.asarray(test_sen)
    test_sen = torch.LongTensor(test_sen)
    test_tensor = Variable(test_sen)

    loaded_model.eval()
    prediction = loaded_model(test_tensor)
#     print("prediction =", prediction)

    out_class = torch.argmax(prediction)
    return out_class

In [None]:
# label_idx = {
#     '0' :  '0', # admiration, desire
#     '10' :  '1', # disapproval, disgust, disappointment, embarrassment
#     '2' : '2',  # anger, annoyance
#     '13' : '3', # excitement, amusement
#     '18' : '4', # love, caring
#     '4' : '5',  # approval
#     '15' : '6', # gratitude
#     '7' : '7',  # curiosity
#     '25' : '8', # sadness , grief, remorse
#     '17' : '9', # joy , pride, relief
#     '20' : '10', # optimism
#     '6' : '11', # confusion
#     '22' : '12', # realization
#     '26' : '13', # surprise
#     '14' :  '14' # fear, nervousness
# }

In [22]:
# test_sen1 = "I enjoyed it."
test_sen1 = "We need more boards and to create a bit more space for [NAME]. Then we’ll be good." # Neutral
test_sen2 = "Not surprised, damn that sucks. Concussions are awful." # Anger
# test_sen3 = "Are you kidding me!! Really??"
# test_sen3 = "seriously wtf. I want to see how the whole hand went in detail. that was the sickest soulread ever" # Anger
test_sen3 = "Thank you SO much! This is so genuine and so helpful. Thank you so much for your time and your thoughts."
test_sen4 = "I am so happy for you" # Joy
test_sen5 = "I do not know what that is." # Nuetral
test_sen6 = "Are you kidding me!! Really??" # Joy

test_sen = [test_sen1, test_sen2, test_sen3, test_sen4, test_sen5, test_sen6]

for i in range(6):
    print('------------')
    x = test_sentence(test_sen[i])
    print(x)

------------
['we', 'need', 'more', 'boards', 'and', 'to', 'create', 'a', 'bit', 'more', 'space', 'for', '[name].', 'then', 'we’ll', 'be', 'good.']
prediction = tensor([[9.9853e-01, 3.2671e-10, 5.1728e-11, 7.2792e-10, 1.3660e-03, 9.4108e-16,
         1.1968e-07, 6.2128e-13, 1.4500e-19, 1.0028e-18, 1.0177e-04, 6.5502e-21,
         5.3880e-22, 2.4030e-21, 1.1633e-22]], grad_fn=<SoftmaxBackward>)
tensor(0)
------------
['not', 'surprised,', 'damn', 'that', 'sucks.', 'concussions', 'are', 'awful.']
prediction = tensor([[3.2741e-15, 1.0000e+00, 6.7946e-14, 7.6872e-15, 1.7044e-22, 1.6645e-24,
         2.4135e-20, 9.7690e-20, 4.1407e-17, 1.1525e-24, 2.9055e-24, 6.3256e-27,
         4.9868e-28, 2.8914e-28, 1.8674e-28]], grad_fn=<SoftmaxBackward>)
tensor(1)
------------
['thank', 'you', 'so', 'much!', 'this', 'is', 'so', 'genuine', 'and', 'so', 'helpful.', 'thank', 'you', 'so', 'much', 'for', 'your', 'time', 'and', 'your', 'thoughts.']
prediction = tensor([[6.6895e-24, 7.1446e-33, 4.7108e-27, 1