# LSTM Model

#### Imports

In [None]:
import pickle
import gensim
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from collections import defaultdict
from tqdm import tqdm

### Load data

In [None]:
with open('../data../all_data.pickle','rb') as f:
    data = pickle.load(f)
with open('../data../mono_lang_embeddings.pickle','rb') as f:
    embedding_dict = pickle.load(f)

In [None]:
from sklearn.utils import shuffle
# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(data['en']['train']['padded']), torch.from_numpy(data['en']['train']['y']))
test_data = TensorDataset(torch.from_numpy(data['en']['test']['padded']), torch.from_numpy(data['en']['test']['y']))

X , y = shuffle(torch.from_numpy(data['en']['train']['padded']),torch.from_numpy(data['en']['train']['y']))
X = X[:10000]
y = y[:10000]
train_data = TensorDataset(X,y)

Xt , yt = shuffle(torch.from_numpy(data['en']['test']['padded']),torch.from_numpy(data['en']['test']['y']))
Xt = X[:100]
yt = y[:100]
test_data = TensorDataset(Xt,yt)
# dataloaders
batch_size = 50 

# make sure to SHUFFLE your data
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

## LSTM model

In [None]:
class LSTM(nn.Module):

    def __init__(self,no_layers,vocab_size,hidden_dim,embedding_dim,output_dim,drop_prob=0.5):
        super(LSTM,self).__init__()
 
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
 
        self.no_layers = no_layers
        self.vocab_size = vocab_size
    
        # embedding
        self.embedding = nn.Embedding.from_pretrained(embedding_dict['en']['matrix'])
        
        #lstm
        self.lstm = nn.LSTM(input_size = embedding_dim,     # 200
                            hidden_size = self.hidden_dim,  # 250
                            num_layers = no_layers,         # 1
                            bidirectional = True,
                            batch_first = True)

        # dropout layer
        self.dropout = nn.Dropout(0.3)
    
        # linear and sigmoid layer
        self.fc = nn.Linear(self.hidden_dim, output_dim)    # 250,1
        self.sig = nn.Sigmoid()

    def forward(self,x,hidden): # [indexes], (h0,c0)
        
        # get embeddings for input 
        x = self.embedding(x) # (batch_size, seq_len, emb_dim) = (2,200,200)

        # forward pass of lstm                          # shapes:  lstm_out = (batch_size, seq_len, hidden_dim) = (2,6,250)
        #                                                          hidden   = (h0,c0) = ((1,2,250), (1,2,250)) --> states for each node for each batch
        lstm_out, hidden = self.lstm(x.float(), hidden) # tensor.float needed for weird double / float error

        # lstm_out: 3D --> 2D
        lstm_flat = lstm_out.contiguous().view(-1, self.hidden_dim) 

        # dropout layer
        lstm_do = self.dropout(lstm_flat)

        # lstm --> fc
        fc_out = self.fc(lstm_do)

        # sigmoid activation
        sigm = self.sig(fc_out) 

        # format output into p vector 
        out = sigm.view(batch_size,-1)
        p = out[:,-1]
        
        return p, hidden

### Instantiate Model

In [None]:
output_dim = 1 #binary classification
hidden_dim = 250 
vocab_size = len(embedding_dict['en']['lookup']) + 1 # + 1 for padding 
emb_dim = 200
no_layers = 1 

model = LSTM(no_layers,vocab_size,hidden_dim,emb_dim,output_dim)
model

### Train Model

In [None]:
epochs = 10
lr = 0.0001
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
losses = []

for epoch in range(epochs):
    # initialise lstm hidden states h = (h0,c0)
    h = (torch.zeros(2, batch_size, hidden_dim), torch.zeros(2, batch_size, hidden_dim)) # 2 for bidirectional  
    running_loss = 0

    for batch, y in train_loader:

        h = tuple([each.data for each in h]) # otherwise model will try to back propagate through all hidden states in the epoch

        optimizer.zero_grad() # do not want gradients to accumulate

        # forward propegation 
        p, h = model.forward(batch,h)

        # compute loss
        loss = criterion(p.squeeze(),y.float())
        running_loss += loss

        # calculate gradients & update weights
        loss.backward()
        optimizer.step()
    losses.append(running_loss*(batch_size/len(train_data)))
    # if epoch % 2== 0:
    print(f'Loss for epoch {epoch} = {running_loss}')


In [None]:
from matplotlib import pyplot as plt
plt.plot(range(epochs),losses)

In [None]:
y,p

### Evaluate model

In [None]:
def predict(n):
    if n > 0.5:
        return 1
    return 0

In [None]:
def n_correct(probs, target):
  predictions = torch.tensor([predict(n)for n in probs])
  corrects = (predictions == target)
  return corrects.sum()

In [None]:
def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_c = 0
    model.eval()
    h = (torch.zeros(2, batch_size, hidden_dim), torch.zeros(2, batch_size, hidden_dim)) 
    with torch.no_grad():
        for batch, y in iterator:
            predictions, _ = model(batch,h)
            loss = criterion(predictions, y.float())
            c = n_correct(predictions.reshape(-1,1), y)
            epoch_loss += loss.item()
            epoch_c += c
    return epoch_loss / len(iterator), (epoch_c / (len(iterator)*batch_size)).item()

In [None]:
test_loss, test_acc = evaluate(model, test_loader, criterion)
test_loss, test_acc