# Assignment 2 - Recurrent Neural Networks



* try to train a model with better accuracy in the cell below. For example, you can use different optimizers such as SGD and Adam. You can also compare different hyperparameters and model size.
### (15 points), to obtain FULL point in this problem, the accuracy needs to be higher than 70%

This is Part 4 of the orginal assignment where we are making below changes in the code inorder to improve the accuracy:
1. Using a bidirectional LSTM model instead of RNN.
2. We will be using tokenization for better storage of vocabulary.
3. Hyper parameters - hidden_dim and embedded_dim are changed.

In [118]:
import copy
import torch
from torch import nn
from torch import optim
import torchtext
import spacy
from torchtext import data
from torchtext import datasets

TEXT = data.Field(sequential=True, tokenize='spacy',tokenizer_language='en_core_web_sm',batch_first=True, lower=True) #Used tokenize 
LABEL = data.LabelField(sequential=False, use_vocab=True, is_target=True)

# load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)

# build dictionary
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

#print(train_data)

# hyperparameters
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 100 #changed from 128 to 100
#hidden_dim = 128
hidden_dim = 256

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size=32)


In [16]:
#Printing the sample values in training data
print(train_data[0].__dict__.keys())
print(train_data[0].__dict__.values())

dict_keys(['text', 'label'])
dict_values([['the', 'rock', 'is', 'destined', 'to', 'be', 'the', '21st', 'century', "'s", 'new', '`', '`', 'conan', "''", 'and', 'that', 'he', "'s", 'going', 'to', 'make', 'a', 'splash', 'even', 'greater', 'than', 'arnold', 'schwarzenegger', ',', 'jean', '-', 'claud', 'van', 'damme', 'or', 'steven', 'segal', '.'], 'positive'])


In [17]:
#Printing number of records in training/Validation/testing 
print(f"No of records in training data: {len(train_data)}")
print(f"No of records in test data: {len(test_data)}")
print(f"No of records in validation data: {len(val_data)}")

print(f"No of records in training iteration: {len(train_iter)}")
print(f"No of records in test iteration: {len(test_iter)}")
print(f"No of records in validation iteration: {len(val_iter)}")

print(f"Vocabulary size : {vocab_size}")
print(f"Number of classes : {label_size}")



No of records in training data: 8544
No of records in test data: 2210
No of records in validation data: 1101
No of records in training iteration: 267
No of records in test iteration: 70
No of records in validation iteration: 35
Vocabulary size : 15459
Number of classes : 3


In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [120]:
# your code here
#Function for model training
def train_model(model, iterator):
     model.train()
     total_loss=[]
     total_accuracy=[]
     for i in range(epoch):
        epoch_loss = 0
        epoch_accuracy=0  
        loss_int=0
        accuracy_int=0
         
        for batch in iterator:            
            text = batch.text
            label=batch.label
            optimizer.zero_grad()  
            #Forward feed
            out = model(text)   
            #Loss Calculation
            loss = criterion(out, label)            
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

             # Calculate accuracy
            predictions = out.argmax(1)
            correct = (predictions == label).sum().item()
            accuracy = correct / len(label)
            epoch_accuracy+=accuracy

        loss_int="{:.4f}".format(epoch_loss / len(iterator))
        accuracy_int="{:.4f}".format(epoch_accuracy / len(iterator))
        total_loss.append(loss_int)
        total_accuracy.append(accuracy_int)
        print(f'Epoch {i+1}/{epoch}, Loss: {loss_int}, Accuracy:{accuracy_int}')

     return total_loss,total_accuracy 

#Function for model evaluation
def evaluate_model(model, iterator):
    model.eval()
    epoch_loss = 0
    epoch_accuracy = 0
    
    with torch.no_grad():
        for batch in iterator:
            text = batch.text
            label = batch.label
            
            # Forward pass
            out = model(text)
            
            # Calculate loss 
            loss = criterion(out, label)
            
            # Calculate accuracy
            predictions = out.argmax(1)
            correct = (predictions == label).sum().item()
            accuracy = correct / len(label)
            
            epoch_loss += loss.item()
            epoch_accuracy += accuracy
            
    final_loss="{:.4f}".format(epoch_loss / len(iterator))
    final_accuracy="{:.4f}".format(epoch_accuracy / len(iterator))
    return final_loss, final_accuracy

In [122]:
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx):
        super(RNNClassifier, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.label_size = label_size
        self.num_layers = 2   #changed from 1 to 2
        
        # add the layers required for sentiment analysis.
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=padding_idx) 
        self.rnn=nn.LSTM(embedding_dim,hidden_dim, num_layers=self.num_layers,batch_first=True, bidirectional=True) #Bidirectional LSTM model
        self.output = nn.Linear(hidden_dim*2, label_size)
        self.dropout = nn.Dropout(dropout)
        # Activation function for classification
        self.activation = nn.Sigmoid() if label_size == 1 else nn.LogSoftmax(dim=1)

    def zero_state(self, batch_size): 
        hidden = torch.zeros(self.num_layers*2, batch_size, self.hidden_dim).to(device)
        cell = torch.zeros(self.num_layers*2, batch_size, self.hidden_dim).to(device)  # cell state
        return hidden,cell

    def forward(self, text):
        #implement the forward function of the model.
        embedding = self.embedding(text)

        #Initializing hidden state 
        batch_size=text.size(0)
        hidden,cell=self.zero_state(batch_size)

        #LSTM
        out,(hidden,cell)=self.rnn(embedding,(hidden,cell))

        #output layer
        #hidden=self.dropout(hidden[-1])
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1) 
        out=self.activation(self.output(hidden))
        
        return out

In [127]:
#Initializing paramters
learning_rate=0.0001
epoch=5
batch_size=32
output_dim=3

#Model Defination
model=RNNClassifier(vocab_size, embedding_dim, hidden_dim, output_dim, padding_idx).to(device)
model=model.to(device)

#Optimizer and Loss function
criterion=nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
     
#Training Model with Training Iteration
print("Training the model with training Iteration")
training_loss, training_accuracy=train_model(model,train_iter)

#Training Model with Validation Iteration
print("Training the model with Validation Iteration")
valid_loss,valid_accuracy=train_model(model,val_iter)

#Evaluating the model with testing Iteration
print("Evaluating the model with testing Iteration")
test_loss, test_accuracy = evaluate_model(model, test_iter)
print(f"Test Loss: {test_loss}, Test Accuracy: {float(test_accuracy)*100}%")

Training the model with training Iteration
Epoch 1/5, Loss: 1.0431, Accuracy:0.4412
Epoch 2/5, Loss: 1.0041, Accuracy:0.5018
Epoch 3/5, Loss: 0.9685, Accuracy:0.5335
Epoch 4/5, Loss: 0.9314, Accuracy:0.5662
Epoch 5/5, Loss: 0.8859, Accuracy:0.6014
Training the model with Validation Iteration
Epoch 1/5, Loss: 0.9637, Accuracy:0.5560
Epoch 2/5, Loss: 0.8961, Accuracy:0.6086
Epoch 3/5, Loss: 0.8430, Accuracy:0.6300
Epoch 4/5, Loss: 0.7840, Accuracy:0.6622
Epoch 5/5, Loss: 0.7171, Accuracy:0.6854
Evaluating the model with testing Iteration
Test Loss: 0.9547, Test Accuracy: 59.730000000000004%
