In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
random.seed(0)
seed = 0

In [2]:
filename = 'data/train_conll_hinglish.csv'
import torchtext

def label2int(label):
    if label=='positive':
        return 1
    elif label=='negative':
        return 0
    else:
        return 2

text_field = torchtext.data.Field(sequential=True,      # text sequence
                                  tokenize=lambda x: x.lower(), # because are building a character-RNN
                                  include_lengths=False, # to track the length of sequences, for batching
                                  batch_first=True,
                                  use_vocab=True)       # to turn each character into an integer index
label_field = torchtext.data.Field(sequential=False,    # not a sequence
                                   use_vocab=False,     # don't need to track vocabulary
                                   is_target=True,      
                                   batch_first=True,
                                   preprocessing=lambda x: label2int(x)) # convert text to 0 and 1

fields = [('id', None),('text', text_field), ('label', label_field)]
dataset = torchtext.data.TabularDataset(filename, # name of the file
                                        "tsv",               # fields are separated by a tab
                                        fields)

In [3]:
for i in range(0,10):
    print(dataset[i].text, "---", dataset[i].label)

@ adilnisarbutt pakistan ka ghra tauq he pakistan israel ko tasleem nahein kerta isko palestine kehta he- occupied palestine --- 0
madarchod mulle ye mathura me nahi dikha tha jab mullo ne hindu ko iss liye mara ki vo lasse ki paise mag liye the… https// t. co/ oxf8tr3bly --- 0
@ narendramodi manya pradhan mantri mahoday shriman narendra modi ji pradhanmantri banne par hardik badhai tahe dil… https// t. co/ prnomskkn1 --- 1
@ atheist_ krishna jcb full trend me chal rahi aa --- 1
@ abhisharsharma_@ ravishkumarblog loksabha me janta sirf modi ko vote de rahi thi na ki kisi mp or bjp ko without m… https// t. co/ shtbwcb7fm --- 1
@ noirnaveed@ angelahana6@ cricketworldcup bhosdike tum pechvade ki tatti hi rahoge bc --- 0
love u bhaijan...♥♥ father+ son..# bharat# iambharat# bharatthiseid best pic from entire# promotions... mashallah… https// t. co/ s2xhwu6lud --- 1
@ manojgajjar111 tumhara pass abh deemagh hai nahi islea google ko apna deemagh banaya hua hai. har koi tumhari tarh… https// 

In [4]:
train, val, test = dataset.split(split_ratio=[0.8,0.1,0.1])

In [5]:
print(len(train), len(val), len(test), len(dataset))

12105 1513 1513 15131


In [6]:
text_field.build_vocab(dataset)
text_field.vocab.stoi

47,
             "'": 48,
             '-': 49,
             '’': 50,
             '️': 51,
             '🙏': 52,
             '❤': 53,
             '🤣': 54,
             '&': 55,
             '😍': 56,
             ')': 57,
             '(': 58,
             '😭': 59,
             '*': 60,
             '😘': 61,
             '🇮': 62,
             '🇳': 63,
             '😊': 64,
             '🌹': 65,
             '💜': 66,
             '🎂': 67,
             '“': 68,
             '💕': 69,
             'ा': 70,
             '😁': 71,
             '👏': 72,
             '🎉': 73,
             '💖': 74,
             '|': 75,
             '👍': 76,
             '”': 77,
             '👌': 78,
             '✌': 79,
             '%': 80,
             'र': 81,
             '😜': 82,
             '💐': 83,
             '😆': 84,
             '🏻': 85,
             '+': 86,
             '♥': 87,
             '्': 88,
             '🤗': 89,
             '😎': 90,
             '😡': 91,
             '🙄': 92,
      

In [7]:
text_field.vocab.itos

['<unk>',
 '<pad>',
 ' ',
 'a',
 'i',
 'e',
 'h',
 't',
 'o',
 'r',
 's',
 'n',
 'k',
 'm',
 'l',
 'd',
 'u',
 'p',
 'b',
 'y',
 'c',
 'g',
 '/',
 '@',
 '.',
 'j',
 'w',
 'f',
 'v',
 '…',
 'z',
 '_',
 '1',
 '0',
 'q',
 '2',
 'x',
 '3',
 '9',
 '5',
 '4',
 '7',
 '8',
 '6',
 '#',
 '!',
 '😂',
 '?',
 "'",
 '-',
 '’',
 '️',
 '🙏',
 '❤',
 '🤣',
 '&',
 '😍',
 ')',
 '(',
 '😭',
 '*',
 '😘',
 '🇮',
 '🇳',
 '😊',
 '🌹',
 '💜',
 '🎂',
 '“',
 '💕',
 'ा',
 '😁',
 '👏',
 '🎉',
 '💖',
 '|',
 '👍',
 '”',
 '👌',
 '✌',
 '%',
 'र',
 '😜',
 '💐',
 '😆',
 '🏻',
 '+',
 '♥',
 '्',
 '🤗',
 '😎',
 '😡',
 '🙄',
 '🔥',
 '~',
 '🤔',
 '\u200d',
 '💙',
 '😉',
 '😠',
 '😅',
 '🙌',
 '🤪',
 'क',
 'न',
 'म',
 '😀',
 ';',
 '🌸',
 'ी',
 '💓',
 'स',
 '💔',
 '😋',
 '‘',
 '☺',
 '😌',
 '😢',
 'े',
 '—',
 '💞',
 '🥰',
 '💪',
 'त',
 'ह',
 '😔',
 '😝',
 '💗',
 '💯',
 '😄',
 '😑',
 '😒',
 '[',
 '🙈',
 '🌷',
 '🎊',
 '🚩',
 'ं',
 '😇',
 '=',
 '👇',
 '😪',
 ']',
 'é',
 'य',
 '🌺',
 '😹',
 'ि',
 '🏼',
 '😐',
 '😛',
 '>',
 '🙂',
 '😬',
 'ग',
 'प',
 'ब',
 '🎁',
 '😃',
 '💚',
 '😤',
 '🤩',
 '$',
 'á',
 '🎈

In [8]:
small_batch = 128
train_iter = torchtext.data.BucketIterator(train,
                                           batch_size=small_batch,
                                           sort_key=lambda x: len(x.text), # to minimize padding
                                           sort_within_batch=True,        # sort within each batch
                                          )
#                                            repeat=True)                   # repeat the iterator for multiple epochs
val_iter = torchtext.data.BucketIterator(val,
                                           batch_size=small_batch,
                                           sort_key=lambda x: len(x.text), # to minimize padding
                                           sort_within_batch=True,        # sort within each batch
                                        )
#                                            repeat=True)                   # repeat the iterator for multiple epochs
test_iter = torchtext.data.BucketIterator(test,
                                           batch_size=small_batch,
                                           sort_key=lambda x: len(x.text), # to minimize padding
                                           sort_within_batch=True,        # sort within each batch
                                         )
#                                            repeat=True)                   # repeat the iterator for multiple epochs



In [9]:
print(train_iter)
for i, batch in enumerate(train_iter):
    if i >= 10:
        break
    print(batch.text)
    print(batch.label)

<torchtext.data.iterator.BucketIterator object at 0x7f8ee5833f10>
tensor([[23,  2, 17,  ..., 27,  4,  7],
        [15,  3, 10,  ...,  8, 16, 24],
        [ 9,  7, 23,  ...,  4,  5, 10],
        ...,
        [ 9,  7, 23,  ..., 77,  1,  1],
        [23,  2, 10,  ..., 21,  1,  1],
        [ 9,  7, 23,  ..., 46,  1,  1]])
tensor([1, 1, 1, 2, 1, 1, 2, 0, 1, 2, 2, 2, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 0, 1,
        1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 0, 2, 2, 0, 0, 2, 2, 1,
        1, 0, 1, 2, 1, 1, 2, 2, 2, 0, 0, 0, 2, 2, 1, 2, 2, 0, 1, 2, 2, 1, 2, 0,
        2, 0, 1, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 0, 0, 2, 1, 1, 2, 1, 1, 2,
        0, 2, 2, 0, 1, 1, 2, 2, 1, 0, 1, 2, 2, 2, 1, 1, 0, 2, 2, 0, 2, 1, 1, 1,
        2, 2, 1, 1, 0, 2, 1, 0])
tensor([[23,  2, 28,  ..., 20, 14,  5],
        [23,  2, 18,  ...,  2, 17,  5],
        [ 9,  7, 23,  ...,  3, 14,  4],
        ...,
        [23,  2, 10,  ...,  1,  1,  1],
        [23,  2, 10,  ...,  1,  1,  1],
        [23,  2, 12,  ...,  1,  1,

In [10]:
from torch import nn

class SentimentLSTM(nn.Module):
    
    def __init__(self, n_vocab, n_embed, n_hidden, n_output, n_layers, bidir=False, drop_p = 0.5):
        super().__init__()
        # params: "n_" means dimension
        self.n_vocab = n_vocab     # number of unique words in vocabulary
        self.n_layers = n_layers   # number of LSTM layers 
        self.n_hidden = n_hidden   # number of hidden nodes in LSTM
        
        self.embedding = nn.Embedding(n_vocab, n_embed)
        # self.lstm = nn.LSTM(n_embed, n_hidden, n_layers, batch_first = True, dropout = drop_p)
        self.lstm = nn.LSTM(n_embed, n_hidden, n_layers, batch_first = True, bidirectional=True, dropout = drop_p)
        self.dropout = nn.Dropout(drop_p)
        self.fc = nn.Linear(2*n_hidden, n_output)
        # self.sigmoid = nn.Sigmoid()
        
    def forward (self, input_words):
                                             # INPUT   :  (batch_size, seq_length)
        batch_size = len(input_words)
        embedded_words = self.embedding(input_words)    # (batch_size, seq_length, n_embed)
        lstm_out, h = self.lstm(embedded_words)         # (batch_size, seq_length, n_hidden)
        lstm_out = self.dropout(lstm_out)
        out = self.fc(lstm_out[:,-1])
        log_probs = F.log_softmax(out)

        # sigmoid_out = self.sigmoid(fc_out)              # (batch_size*seq_length, n_output)
        # sigmoid_out = fc_out.view(batch_size, -1)  # (batch_size, seq_length*n_output)
        
        # extract the output of ONLY the LAST output of the LAST element of the sequence
        # sigmoid_last = sigmoid_out[:, -1].view([batch_size,1])               # (batch_size, 1)
        # print("this",sigmoid_last.shape, sigmoid_out.shape, input_words.shape)
        return log_probs
        # return sigmoid_last, h
    
    
    def init_hidden (self, batch_size):  # initialize hidden weights (h,c) to 0
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        weights = next(self.parameters()).data
        h = (weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device),
             weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
        
        return h

In [11]:
from torch import optim
device = 'cuda' if torch.cuda.is_available else 'cpu'

In [12]:
def binary_accuracy(logits, labels):
    correct, total = 0, 0
    _, predicted = torch.max(logits, 1)
#     print(predicted, labels)
#     print(predicted.shape, labels.shape)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    return correct / total

In [13]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()

    for batch in iterator:
        optimizer.zero_grad()
        h = model.init_hidden(len(batch))
        batch.text = batch.text.to(device)
        # predictions, _ = model(batch.text)
        predictions = model(batch.text)
        # predictions = predictions.squeeze(1)
        # print(batch.text.shape, predictions.shape, batch.label.shape)

        # target = torch.tensor(batch.label, dtype=torch.float, device=device)
        loss = criterion(predictions, batch.label.to(device))

        acc = binary_accuracy(predictions, batch.label.to(device))
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc

        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [14]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:
            h = model.init_hidden(len(batch))
            batch.text = batch.text.to(device)
            predictions = model(batch.text)

            loss = criterion(predictions, batch.label.to(device))
            
            acc = binary_accuracy(predictions, batch.label.to(device))

            epoch_loss += loss.item()
            epoch_acc += acc
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [15]:
from sklearn.metrics import confusion_matrix

def get_metrics(model, config):
    train, val, test = dataset.split(split_ratio=[0.8,0.1,0.1])
    exp_name = config["NAME"]
    test_iter = torchtext.data.BucketIterator(test,
                                           batch_size=len(test),
                                           sort_key=lambda x: len(x.text), # to minimize padding
                                           sort_within_batch=True)        # sort within each batch
    print("BEST METRICS VALID")
    model.load_state_dict(torch.load('{}-valid.pt'.format(exp_name)))
    model.eval()
    with torch.no_grad():
        for batch in test_iter:
            h = model.init_hidden(len(batch))
            batch.text = batch.text.to(device)
            predictions = model(batch.text)
            _, preds = torch.max(predictions, 1)
            precision, recall, f_1, _ = precision_recall_fscore_support(batch.label.cpu(),preds.cpu(),average='macro')
            print(f'Test f1: {f_1:.3f} | Test Prec: {recall*100:.2f}% | Test Recall: {recall*100:.2f}% ')
            conf = confusion_matrix(batch.label.cpu(), preds.cpu())
            pp.pprint(conf)
            

    print("BEST METRICS TRAIN")
    model.load_state_dict(torch.load('{}-train.pt'.format(exp_name)))
    model.eval()
    with torch.no_grad():
        for batch in test_iter:
            h = model.init_hidden(len(batch))
            batch.text = batch.text.to(device)
            predictions = model(batch.text)
            _, preds = torch.max(predictions, 1)
            precision, recall, f_1, _ = precision_recall_fscore_support(batch.label.cpu(),preds.cpu(),average='macro')
            print(f'Test f1: {f_1:.3f} | Test Prec: {recall*100:.2f}% | Test Recall: {recall*100:.2f}% ')
            conf = confusion_matrix(batch.label.cpu(), preds.cpu())
            pp.pprint(conf)

    

In [16]:
def test(model, config):
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    criterion = criterion.to(device)
    exp_name = config["NAME"]
    print("TEST VALID")
    model.load_state_dict(torch.load('{}-valid.pt'.format(exp_name)))
    model.eval()
    valid_loss, valid_acc = evaluate(model, test_iter, criterion)
    print(f'\t Test. Loss: {valid_loss:.3f} |  Test. Acc: {valid_acc*100:.2f}%')

    print("TEST TRAIN")
    model.load_state_dict(torch.load('{}-train.pt'.format(exp_name)))
    model.eval()
    valid_loss, valid_acc = evaluate(model, test_iter, criterion)
    print(f'\t Test. Loss: {valid_loss:.3f} |  Test. Acc: {valid_acc*100:.2f}%')


In [17]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


In [18]:
def run_loop(model, config):
    optimizer = optim.RMSprop(model.parameters(), lr = config["LR"])
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    criterion = criterion.to(device)
    N_EPOCHS = config["N_EPOCHS"]
    exp_name = config["NAME"]

    best_valid_loss = float('inf')
    best_train_loss = float('inf')

    print("BEGIN TRAINING")
    print("-"*50)
    for epoch in range(N_EPOCHS):

        start_time = time.time()
        
        train_loss, train_acc = train(model, train_iter, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, val_iter, criterion)
        
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
        if valid_loss < best_valid_loss:
            print("SAVED VALID")
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), '{}-valid.pt'.format(exp_name))

        if train_loss < best_train_loss:
            print("SAVED TRAIN")
            best_train_loss = valid_loss
            torch.save(model.state_dict(), '{}-train.pt'.format(exp_name))
        
        print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')


In [19]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
print("-"*50)
print("Running Sentiment Analysis, Random Embeddings, hinglish")

config = {
    "NAME": "LSTM_RandomEmbeddings_hinglish",
    "N_EPOCHS": 100,
    "N_VOCAB": len(text_field.vocab.itos),
    "N_EMBED" : 400,
    "EMB_TRAIN": True,
    "N_HIDDEN" : 130,
    "N_OUTPUT" : 3,
    "N_LAYERS" : 2,
    "BIDIR": True,
    "DRPOUT": 0.5,
    "LR": 0.01
}
model = SentimentLSTM(config["N_VOCAB"],config["N_EMBED"], config["N_HIDDEN"], config["N_OUTPUT"], config["N_LAYERS"], config["BIDIR"], config["DRPOUT"])
pp.pprint(config)
print("-"*50)
run_loop(model, config)
print("-"*50)
print("END TRAINING")
print("-"*50)
print("START TESTING")
print("-"*50)
test(model, config)
print("-"*50)
print("END TESTING")
print("-"*50)
print("GET METRICS")
print("-"*50)
get_metrics(model, config)

--------------------------------------------------
Running Sentiment Analysis, Random Embeddings, hinglish
{   'BIDIR': True,
    'DRPOUT': 0.5,
    'EMB_TRAIN': True,
    'LR': 0.01,
    'NAME': 'LSTM_RandomEmbeddings_hinglish',
    'N_EMBED': 400,
    'N_EPOCHS': 100,
    'N_HIDDEN': 130,
    'N_LAYERS': 2,
    'N_OUTPUT': 3,
    'N_VOCAB': 899}
--------------------------------------------------
BEGIN TRAINING
--------------------------------------------------
SAVED VALID
SAVED TRAIN
Epoch: 01 | Epoch Time: 0m 4s
	Train Loss: 1.331 | Train Acc: 35.82%
	 Val. Loss: 1.129 |  Val. Acc: 36.18%
SAVED VALID
SAVED TRAIN
Epoch: 02 | Epoch Time: 0m 4s
	Train Loss: 1.099 | Train Acc: 37.69%
	 Val. Loss: 1.101 |  Val. Acc: 36.63%
SAVED VALID
SAVED TRAIN
Epoch: 03 | Epoch Time: 0m 4s
	Train Loss: 1.093 | Train Acc: 38.13%
	 Val. Loss: 1.084 |  Val. Acc: 38.56%
Epoch: 04 | Epoch Time: 0m 4s
	Train Loss: 1.090 | Train Acc: 38.06%
	 Val. Loss: 1.104 |  Val. Acc: 36.46%
SAVED VALID
Epoch: 05 | Epoch

In [54]:
class OneHotLSTM(nn.Module):
    
    def __init__(self, config, drop_p = 0.5):
        super().__init__()
        # params: "n_" means dimension
        self.n_vocab = config["N_VOCAB"]     # number of unique words in vocabulary
        self.n_layers = config["N_LAYERS"]   # number of LSTM layers 
        self.n_hidden = config["N_HIDDEN"]   # number of hidden nodes in LSTM
        
        self.embedding = nn.Embedding(self.n_vocab, config["N_EMBED"])
        self.embedding.weight.data = torch.eye(self.n_vocab)
        # make embedding untrainable
        if not config["EMB_TRAIN"]:
            self.embedding.weight.requires_grad=False
        self.lstm = nn.LSTM(config["N_EMBED"], self.n_hidden, self.n_layers, batch_first = True, dropout = drop_p)
        self.dropout = nn.Dropout(drop_p)
        self.fc = nn.Linear(self.n_hidden, config["N_OUTPUT"])
        self.sigmoid = nn.Sigmoid()
        
    def forward (self, input_words):
                                             # INPUT   :  (batch_size, seq_length)
        batch_size = len(input_words)
        embedded_words = self.embedding(input_words)    # (batch_size, seq_length, n_embed)
        lstm_out, h = self.lstm(embedded_words)         # (batch_size, seq_length, n_hidden)
        lstm_out = self.dropout(lstm_out)
        lstm_out = lstm_out.contiguous().view(-1, self.n_hidden) # (batch_size*seq_length, n_hidden)
        fc_out = self.fc(lstm_out)                      # (batch_size*seq_length, n_output)
        sigmoid_out = self.sigmoid(fc_out)              # (batch_size*seq_length, n_output)
        sigmoid_out = sigmoid_out.view(batch_size, -1)  # (batch_size, seq_length*n_output)
        
        # extract the output of ONLY the LAST output of the LAST element of the sequence
        sigmoid_last = sigmoid_out[:, -1].view([batch_size,1])               # (batch_size, 1)
        # print("this",sigmoid_last.shape, sigmoid_out.shape, input_words.shape)
        
        return sigmoid_last, h
    
    
    def init_hidden (self, batch_size):  # initialize hidden weights (h,c) to 0
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        weights = next(self.parameters()).data
        h = (weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device),
             weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
        
        return h

In [94]:
print("-"*50)
print("Running LSTM, One Hot Embeddings, Untrainable")

config = {
    "NAME": "LSTM_OneHot_Untrainable",
    "N_EPOCHS": 1,
    "EMB_TRAIN": False,
    "N_VOCAB": len(text_field.vocab.itos),
    "N_EMBED" : len(text_field.vocab.itos),
    "N_HIDDEN" : 512,
    "N_OUTPUT" : 3,
    "N_LAYERS" : 2,
    "LR": 0.001
}
model = OneHotLSTM(config)
pp.pprint(config)
print("-"*50)
run_loop(model, config)
print("-"*50)
print("END TRAINING")
print("-"*50)
print("START TESTING")
print("-"*50)
test(model, config)
print("-"*50)
print("END TESTING")
print("-"*50)

--------------------------------------------------
Running LSTM, One Hot Embeddings, Untrainable
{   'EMB_TRAIN': False,
    'LR': 0.001,
    'NAME': 'LSTM_OneHot_Untrainable',
    'N_EMBED': 555,
    'N_EPOCHS': 1,
    'N_HIDDEN': 512,
    'N_LAYERS': 2,
    'N_OUTPUT': 3,
    'N_VOCAB': 555}
--------------------------------------------------
BEGIN TRAINING
--------------------------------------------------
Epoch: 01 | Epoch Time: 0m 8s
	Train Loss: 0.694 | Train Acc: 50.07%
	 Val. Loss: 0.693 |  Val. Acc: 49.80%
--------------------------------------------------
END TRAINING
--------------------------------------------------
START TESTING
--------------------------------------------------
Test Loss: 0.693 | Test Acc: 49.86%
--------------------------------------------------
END TESTING
--------------------------------------------------


In [95]:
print("-"*50)
print("Running LSTM, One Hot Embeddings, Trainable")

config = {
    "NAME": "LSTM_OneHot_Trainable",
    "N_EPOCHS": 100,
    "EMB_TRAIN": True,
    "N_VOCAB": len(text_field.vocab.itos),
    "N_EMBED" : len(text_field.vocab.itos),
    "N_HIDDEN" : 512,
    "N_OUTPUT" : 3,
    "N_LAYERS" : 2,
    "LR": 0.001
}
model = OneHotLSTM(config)
pp.pprint(config)
print("-"*50)
run_loop(model, config)
print("-"*50)
print("END TRAINING")
print("-"*50)
print("START TESTING")
print("-"*50)
test(model, config)
print("-"*50)
print("END TESTING")
print("-"*50)

--------------------------------------------------
Running LSTM, One Hot Embeddings, Trainable
{   'EMB_TRAIN': True,
    'LR': 0.001,
    'NAME': 'LSTM_OneHot_Trainable',
    'N_EMBED': 555,
    'N_EPOCHS': 100,
    'N_HIDDEN': 512,
    'N_LAYERS': 2,
    'N_OUTPUT': 3,
    'N_VOCAB': 555}
--------------------------------------------------
BEGIN TRAINING
--------------------------------------------------
Epoch: 01 | Epoch Time: 0m 9s
	Train Loss: 0.694 | Train Acc: 50.07%
	 Val. Loss: 0.693 |  Val. Acc: 49.82%
Epoch: 02 | Epoch Time: 0m 9s
	Train Loss: 0.693 | Train Acc: 50.07%
	 Val. Loss: 0.693 |  Val. Acc: 49.79%
Epoch: 03 | Epoch Time: 0m 8s
	Train Loss: 0.693 | Train Acc: 50.08%
	 Val. Loss: 0.693 |  Val. Acc: 49.79%
Epoch: 04 | Epoch Time: 0m 9s
	Train Loss: 0.693 | Train Acc: 50.02%
	 Val. Loss: 0.693 |  Val. Acc: 49.81%
Epoch: 05 | Epoch Time: 0m 8s
	Train Loss: 0.693 | Train Acc: 49.78%
	 Val. Loss: 0.693 |  Val. Acc: 49.80%
Epoch: 06 | Epoch Time: 0m 8s
	Train Loss: 0.693 | 

In [None]:
class SentimentGRU(nn.Module):
    
    def __init__(self, n_vocab, n_embed, n_hidden, n_output, n_layers, drop_p = 0):
        super().__init__()
        # params: "n_" means dimension
        self.n_vocab = n_vocab     # number of unique words in vocabulary
        self.n_layers = n_layers   # number of LSTM layers 
        self.n_hidden = n_hidden   # number of hidden nodes in LSTM
        
        self.embedding = nn.Embedding(n_vocab, n_embed)
        # self.lstm = nn.GRU(n_embed, n_hidden, n_layers, batch_first = True, dropout = drop_p)
        self.lstm = nn.GRU(n_embed, n_hidden, n_layers, batch_first = True, bidirectional=True, dropout = drop_p)
        self.dropout = nn.Dropout(drop_p)
        self.fc = nn.Linear(n_hidden, n_output)
        self.sigmoid = nn.Sigmoid()
        
    def forward (self, input_words):
                                             # INPUT   :  (batch_size, seq_length)
        batch_size = len(input_words)
        embedded_words = self.embedding(input_words)    # (batch_size, seq_length, n_embed)
        lstm_out, h = self.lstm(embedded_words)         # (batch_size, seq_length, n_hidden)
        lstm_out = self.dropout(lstm_out)
        lstm_out = lstm_out.contiguous().view(-1, self.n_hidden) # (batch_size*seq_length, n_hidden)
        fc_out = self.fc(lstm_out)                      # (batch_size*seq_length, n_output)
        sigmoid_out = self.sigmoid(fc_out)              # (batch_size*seq_length, n_output)
        sigmoid_out = sigmoid_out.view(batch_size, -1)  # (batch_size, seq_length*n_output)
        
        # extract the output of ONLY the LAST output of the LAST element of the sequence
        sigmoid_last = sigmoid_out[:, -1].view([batch_size,1])               # (batch_size, 1)
        # print("this",sigmoid_last.shape, sigmoid_out.shape, input_words.shape)
        
        return sigmoid_last, h
    
    
    def init_hidden (self, batch_size):  # initialize hidden weights (h,c) to 0
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        weights = next(self.parameters()).data
        h = (weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device),
             weights.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
        
        return h