# RNNs with custom embeddings

## Importing data

In [1]:
import copy
from itertools import product
from numpy import isnan
import pandas as pd
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext.data as data
import sys
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../data_pipeline/')
import preprocessing as pre
from training import TrainingModule

SEED = 1312
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
train_data, test_data, val_data, TEXT, LABEL = pre.get_data(
    'train_small.csv', 'val_small.csv', 'test_small.csv')

Connected!


In [3]:
USE_CUDA = torch.cuda.is_available()

TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

BATCH_SIZE = 5

train_it, test_it, val_it = data.BucketIterator.splits(
    (train_data, test_data, val_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.alj_text),
    sort_within_batch=True,
    device = torch.device('cuda' if USE_CUDA else 'cpu'))

## RNN Model

In [4]:
class RNN(nn.Module):
    
    def __init__(self, rnn_type, input_size, embedding_size,
                 hidden_size, output_size, num_layers, dropout,
                 bidirectional, padding_idx):
    
        super().__init__()
        self.embedding = nn.Embedding(input_size, embedding_size,
                                      padding_idx=padding_idx)
        
        self.rnn = getattr(nn, rnn_type.upper())\
                          (embedding_size, hidden_size, num_layers,
                           dropout=(dropout if num_layers > 1 else 0),
                           bidirectional=bidirectional)
        
        self.leakyrelu = nn.LeakyReLU()
        self.dropout = nn.Dropout(dropout)
        
        linear_inp = (hidden_size * 2 if bidirectional else hidden_size)
        self.linear = nn.Linear(linear_inp, output_size)
             
    def forward(self, input):
        embed = self.embedding(input)
        rnn_out, hidden = self.rnn(embed)
        rnn_out = rnn_out[-1]
        rnn_out = self.leakyrelu(rnn_out)
        rnn_out = self.dropout(rnn_out)
        linear_out = self.linear(rnn_out)
        return linear_out

## Training models

In [None]:
# Store training results
df = pd.DataFrame(columns=['architecture', 'model_type', 'embeddings',
                           'hidden', 'num_layers', 'dropouts',
                           'bidirectional', 'learning_rate', 'epochs',
                           'dev_acc', 'dev_prec', 'dev_recall',
                           'metric'])

# Model architecture parameters
RNN_TYPES = ['RNN', 'LSTM']
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_SIZES = [32, 64, 128, 256]
HIDDEN_SIZES = [1/3, 2/3]
OUTPUT_SIZE = 1
NUM_LAYERS = [1, 2]
DROPOUTS = [0.5, 0.75]
BIDIRECTIONALS = [False, True]
PADDING_IDX = TEXT.vocab.stoi[TEXT.pad_token]

# Model training hyperparameters
LEARNING_RATE = [0.01, 0.0001]
train_len = 0
train_pos = 0
for batch in train_it:
    train_len += len(batch.decision_binary)
    train_pos += batch.decision_binary.sum().item()
POS_WEIGHT = torch.tensor([(train_len - train_pos) / train_pos])
if USE_CUDA:
    POS_WEIGHT = POS_WEIGHT.cuda()
EPOCHS = 10

# Iterator over various model parameters
param_iter = product (RNN_TYPES, EMBEDDING_SIZES, HIDDEN_SIZES,
                      NUM_LAYERS, DROPOUTS, BIDIRECTIONALS,
                      LEARNING_RATE)

# Magic loop
best_acc = (None, None)
best_rec = (None, None)
best_prec = (None, None)
for i, (rnn_type, embed_size, hidden_size, num_layers, dropout,\
    bidirectional, lr) in enumerate(param_iter):
    print(f'Architecture #{i}\n' + '-' * 20)
    hidden_dim = int(hidden_size * embed_size)
    model = RNN(rnn_type, INPUT_DIM, embed_size, hidden_dim,
                OUTPUT_SIZE, num_layers, dropout, bidirectional,
                PADDING_IDX)
    
    tm = TrainingModule(model, lr, POS_WEIGHT, USE_CUDA, EPOCHS)
    
    best_models = tm.train_model(train_it, val_it)
    
    for metric, best_model in best_models.items():
        row = [i, embed_size, hidden_size, dropout,
               lr, EPOCHS, best_model.accuracy,
               best_model.precision, best_model.recall, metric]
        df.loc[len(df)] = row
        if best_acc[0] is None or isnan(best_acc[1]) or\
           best_model.accuracy > best_acc[1]:
            best_acc = (copy.deepcopy(best_model.model), best_model.accuracy)
        if best_rec[0] is None or isnan(best_rec[1]) or\
           best_model.recall > best_rec[1]:
            best_rec = (copy.deepcopy(best_model.model), best_model.recall)
        if best_prec[0] is None or isnan(best_prec[1]) or\
           best_model.precision > best_prec[1]:
            best_prec = (copy.deepcopy(best_model.model), best_model.precision)
    
    print('-' * 20 + '\n')


Architecture #0
--------------------
Epoch 0: Dev Accuracy: 0.1708; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8276
Epoch 1: Dev Accuracy: 0.8292; Dev Precision: nan; Dev Recall: nan; Dev Loss:0.7702
Epoch 2: Dev Accuracy: 0.2042; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8024
Epoch 3: Dev Accuracy: 0.2042; Dev Precision: 0.1417; Dev Recall: nan; Dev Loss:0.8160
Epoch 4: Dev Accuracy: 0.6750; Dev Precision: nan; Dev Recall: nan; Dev Loss:0.7548
Epoch 5: Dev Accuracy: 0.1708; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8606
Epoch 6: Dev Accuracy: 0.6292; Dev Precision: nan; Dev Recall: nan; Dev Loss:0.7831
Epoch 7: Dev Accuracy: 0.6583; Dev Precision: nan; Dev Recall: nan; Dev Loss:0.7891
Epoch 8: Dev Accuracy: 0.6417; Dev Precision: nan; Dev Recall: nan; Dev Loss:0.7904
Epoch 9: Dev Accuracy: 0.4208; Dev Precision: 0.1139; Dev Recall: nan; Dev Loss:0.8118
--------------------

Architecture #1
--------------------
Epoch 0: Dev Accuracy: 0.8458; Dev Precision: nan

Epoch 0: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8894
Epoch 1: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8818
Epoch 2: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8744
Epoch 3: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8651
Epoch 4: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8575
Epoch 5: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8470
Epoch 6: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8420
Epoch 7: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8366
Epoch 8: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8318
Epoch 9: Dev Accuracy: 0.1375; Dev Precision: 0.1375; Dev Recall: nan; Dev Loss:0.8290
--------------------

Architecture #10
--------------------
Epoch 0: Dev Accuracy: 0.4625; Dev Precision: nan; Dev Recall: nan; De

## Save results of model training

In [None]:
SAVE_PREFIX = '../results/RNNCustom_'
df.to_csv(f'{SAVE_PREFIX}models.csv')
torch.save(best_acc[0], f'{SAVE_PREFIX}best_acc.pt')
torch.save(best_rec[0], f'{SAVE_PREFIX}best_rec.pt')
torch.save(best_prec[0], f'{SAVE_PREFIX}best_prec.pt')