## Importing data, preparing batch processing

In [1]:
import copy
from itertools import product
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import torchtext.data as data

import preprocessing as pre

In [6]:
train_data, test_data, val_data, TEXT, LABEL = pre.get_data('train_small.csv', 'val_small.csv', 'test_small.csv', None)

Connected!


In [7]:
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

In [8]:
USE_CUDA = torch.cuda.is_available()

In [9]:
BATCH_SIZE = 5

device = torch.device('cuda' if USE_CUDA else 'cpu')

TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

train_it, test_it, val_it = data.BucketIterator.splits(
    (train_data, test_data, val_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.alj_text),
    sort_within_batch=True,
    device = device)

## Setting up model

In [22]:
class RNN(nn.Module):
    def __init__(self, rnn_type, input_size, embedding_size, hidden_size, output_size,
                 num_layers, dropout, bidirectional, padding_idx):
        super().__init__()
        self.embedding = nn.Embedding(input_size, embedding_size, padding_idx=padding_idx)
        self.rnn = getattr(nn, rnn_type.upper())(embedding_size, hidden_size, num_layers,
                                                 dropout=(dropout if num_layers > 1 else 0),
                                                 bidirectional=bidirectional)
        self.dropout = nn.Dropout(dropout)
        self.leakyrelu = nn.LeakyReLU()
        linear_inp = (hidden_size * 2 if bidirectional else hidden_size)
        self.linear = nn.Linear(linear_inp, output_size)
             
    def forward(self, input):
        embed = self.embedding(input)
        rnn_out, hidden = self.rnn(embed)
        rnn_out = rnn_out[-1]
        rnn_out = self.leakyrelu(rnn_out)
        dropped_rnn_out = self.dropout(rnn_out)
        linear_out = self.linear(rnn_out)
        return linear_out
    
    def evaluate(self, preds, labels):
        return self.loss_fn(pred, label)


In [23]:
def binary_accuracy(preds, y):
    """
    Return accuracy per batch
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y) # convert into float for division 
    acc = correct.sum().item() / len(correct)
    return acc

class Training_module():

    def __init__(self, model, lr, pos_weight, use_cuda, epochs):
        self.model = model
        self.use_cuda = use_cuda
        if self.use_cuda:
            model = model.cuda()
            
        self.epochs = epochs
       
        ##YOUR CODE HERE##
        # Choose an optimizer. optim.Adam is a popular choice
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
        

    
    def train_epoch(self, iterator):
        '''
        Train the model for one epoch. For this repeat the following, 
        going through all training examples.
        1. Get the next batch of inputs from the iterator.
        2. Determine the predictions using a forward pass.
        3. Compute the loss.
        4. Compute gradients using a backward pass.
        5. Execute one step of the optimizer to update the model paramters.
        '''
        epoch_loss = 0
        epoch_acc = 0
        self.model.train()
        
        for batch in iterator:
          # batch.text has the texts and batch.label has the labels.

            self.optimizer.zero_grad()
                
            ##YOUR CODE HERE##
            text = batch.alj_text
            target = batch.decision_binary
            if self.use_cuda:
                text = text.cuda()
                target = target.cuda()
            predictions = self.model.forward(text).squeeze()
            loss = self.loss_fn(predictions, target)
            accuracy = binary_accuracy(predictions, target)
        
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()
            epoch_acc += accuracy
        
        return epoch_loss / len(iterator), epoch_acc / len(iterator)
    
    def train_model(self, train_iterator, dev_iterator):
        """
        Train the model for multiple epochs, and after each evaluate on the
        development set.  Return the best performing model.
        """  
        dev_accs = [0.]
        best_model = None
        for epoch in range(self.epochs):
            self.train_epoch(train_iterator)
            dev_acc = self.evaluate(dev_iterator)
            print(f"Epoch {epoch}: Dev Accuracy: {dev_acc[1]} Dev Loss:{dev_acc[0]}")
            if dev_acc[1] > max(dev_accs) or best_model is None:
                best_model = copy.deepcopy(self.model)
                #best_model.flatten_parameters() # would be good for RNNs
            dev_accs.append(dev_acc[1])

        return best_model
                
    def evaluate(self, iterator):
        '''
        Evaluate the performance of the model on the given examples.
        '''
        epoch_loss = 0
        epoch_acc = 0
        self.model.eval()
    
        with torch.no_grad():
    
            for batch in iterator:

                ##YOUR CODE HERE##
                text = batch.alj_text
                target = batch.decision_binary
                if self.use_cuda:
                    text = text.cuda()
                    target = target.cuda()
                predictions = self.model.forward(text).squeeze()
                loss = self.loss_fn(predictions, target)
                acc = binary_accuracy(predictions, target)
                epoch_loss += loss.item()
                epoch_acc += acc
        
        return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [24]:
# Model architecture parameters
RNN_TYPES = ['RNN', 'LSTM']
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_SIZES = [32, 64, 128, 256]
HIDDEN_SIZES = [1/3, 2/3]
OUTPUT_SIZE = 1
NUM_LAYERS = [1, 2]
DROPOUTS = [0.5, 0.75]
BIDIRECTIONALS = [False, True]
PADDING_IDX = TEXT.vocab.stoi[TEXT.pad_token]

# Model training hyperparameters
LEARNING_RATE = [0.01, 0.0001]
train_len = 0
train_pos = 0
for batch in train_it:
    train_len += len(batch.decision_binary)
    train_pos += batch.decision_binary.sum().item()
POS_WEIGHT = torch.tensor([(train_len - train_pos) / train_pos])
if USE_CUDA:
    POS_WEIGHT = POS_WEIGHT.cuda()
EPOCHS = 10

# Iterator over various model parameters
param_iter = product (RNN_TYPES, EMBEDDING_SIZES, HIDDEN_SIZES, NUM_LAYERS, DROPOUTS,
                      BIDIRECTIONALS, LEARNING_RATE)

# Magic loop
best_models = []
for rnn_type, embed_size, hidden_size, num_layers, dropout, bidirectional, lr in param_iter:
    # Print out model type here
    model = RNN(rnn_type, INPUT_DIM, embed_size, int(hidden_size * embed_size),
                OUTPUT_SIZE, num_layers, dropout, bidirectional, PADDING_IDX)
    tm = Training_module(model, lr, POS_WEIGHT, USE_CUDA, EPOCHS)
    best_model = tm.train_model(train_it, val_it)
    best_models.append(best_model) # might be nice to save accuracy and recall numbers here as well

Epoch 0: Dev Accuracy: 0.8291666666666667 Dev Loss:0.7839235415061315
Epoch 1: Dev Accuracy: 0.6916666666666668 Dev Loss:0.7952810873587927
Epoch 2: Dev Accuracy: 0.27499999999999997 Dev Loss:0.8076499203840891
Epoch 3: Dev Accuracy: 0.13749999999999998 Dev Loss:0.8182866225639979
Epoch 4: Dev Accuracy: 0.8458333333333333 Dev Loss:0.7249854902426401
Epoch 5: Dev Accuracy: 0.7958333333333334 Dev Loss:0.7891344974438349
Epoch 6: Dev Accuracy: 0.5791666666666667 Dev Loss:0.8267748008171717
Epoch 7: Dev Accuracy: 0.5708333333333333 Dev Loss:0.7856484552224478
Epoch 8: Dev Accuracy: 0.25416666666666665 Dev Loss:0.842288464307785
Epoch 9: Dev Accuracy: 0.6041666666666666 Dev Loss:0.7632258186737696
Epoch 0: Dev Accuracy: 0.8624999999999999 Dev Loss:0.7463264813025793
Epoch 1: Dev Accuracy: 0.8624999999999999 Dev Loss:0.7497777938842773
Epoch 2: Dev Accuracy: 0.8624999999999999 Dev Loss:0.7538933058579763
Epoch 3: Dev Accuracy: 0.8624999999999999 Dev Loss:0.756428470214208
Epoch 4: Dev Accura

KeyboardInterrupt: 