## Importing data, preparing batch processing

In [1]:
import preprocessing as pre
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import torchtext.data as data
import copy

In [2]:
train_data, test_data, val_data, TEXT, LABEL = pre.get_data('train_small.csv', 'val_small.csv', 'test_small.csv', None)

Connected!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apply(lambda x: x[0: 1000000])


In [3]:
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

In [4]:
USE_CUDA = torch.cuda.is_available()

In [5]:
BATCH_SIZE = 5

device = torch.device('cuda' if USE_CUDA else 'cpu')

TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

train_it, test_it, val_it = data.BucketIterator.splits(
    (train_data, test_data, val_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.alj_text),
    sort_within_batch=True,
    device = device)

## Setting up model

In [6]:
class RNN(nn.Module):
    def __init__(self, rnn_type, input_size, embedding_size, hidden_size, output_size,
                 num_layers, dropout, bidirectional, padding_idx):
        super().__init__()
        self.embedding = nn.Embedding(input_size, embedding_size, padding_idx=padding_idx)
        self.rnn = getattr(nn, rnn_type.upper())(embedding_size, hidden_size, num_layers, dropout=dropout,
                                         bidirectional=bidirectional)

        self.dropout = nn.Dropout(dropout)
        linear_inp = (hidden_size * 2 if bidirectional else hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)
             
    def forward(self, input):
        embed = self.embedding(input)
        rnn_out, hidden = self.rnn(embed)
        rnn_out = rnn_out[-1]
        dropped_rnn_out = self.dropout(rnn_out)
        linear_out = self.linear(rnn_out)
        return linear_out
    
    def evaluate(self, preds, labels):
        return self.loss_fn(pred, label)


In [14]:
def binary_accuracy(preds, y):
    """
    Return accuracy per batch
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y) # convert into float for division 
    acc = correct.sum().item() / len(correct)
    return acc

class Training_module():

    def __init__(self, model, lr, pos_weight, use_cuda, epochs):
        self.model = model
        self.use_cuda = use_cuda
        if self.use_cuda:
            model = model.cuda()
            
        self.epochs = epochs
       
        ##YOUR CODE HERE##
        # Choose an optimizer. optim.Adam is a popular choice
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
        

    
    def train_epoch(self, iterator):
        '''
        Train the model for one epoch. For this repeat the following, 
        going through all training examples.
        1. Get the next batch of inputs from the iterator.
        2. Determine the predictions using a forward pass.
        3. Compute the loss.
        4. Compute gradients using a backward pass.
        5. Execute one step of the optimizer to update the model paramters.
        '''
        epoch_loss = 0
        epoch_acc = 0
        self.model.train()
        
        for batch in iterator:
          # batch.text has the texts and batch.label has the labels.

            self.optimizer.zero_grad()
                
            ##YOUR CODE HERE##
            text = batch.alj_text
            target = batch.decision_binary
            if self.use_cuda:
                text = text.cuda()
                target = target.cuda()
            predictions = self.model.forward(text).squeeze()
            loss = self.loss_fn(predictions, target)
            accuracy = binary_accuracy(predictions, target)
        
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()
            epoch_acc += accuracy
        
        return epoch_loss / len(iterator), epoch_acc / len(iterator)
    
    def train_model(self, train_iterator, dev_iterator):
        """
        Train the model for multiple epochs, and after each evaluate on the
        development set.  Return the best performing model.
        """  
        dev_accs = [0.]
        best_model = None
        for epoch in range(self.epochs):
            self.train_epoch(train_iterator)
            dev_acc = self.evaluate(dev_iterator)
            print(f"Epoch {epoch}: Dev Accuracy: {dev_acc[1]} Dev Loss:{dev_acc[0]}")
            if dev_acc[1] > max(dev_accs) or best_model is None:
                best_model = copy.deepcopy(self.model)
                #best_model.flatten_parameters() # would be good for RNNs
            dev_accs.append(dev_acc[1])

        return best_model
                
    def evaluate(self, iterator):
        '''
        Evaluate the performance of the model on the given examples.
        '''
        epoch_loss = 0
        epoch_acc = 0
        self.model.eval()
    
        with torch.no_grad():
    
            for batch in iterator:

                ##YOUR CODE HERE##
                text = batch.alj_text
                target = batch.decision_binary
                if self.use_cuda:
                    text = text.cuda()
                    target = target.cuda()
                predictions = self.model.forward(text).squeeze()
                loss = self.loss_fn(predictions, target)
                acc = binary_accuracy(predictions, target)
                epoch_loss += loss.item()
                epoch_acc += acc
        
        return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [15]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_SIZE = 100
HIDDEN_SIZE = 100
OUTPUT_SIZE = 1
NUM_LAYERS = 2
DROPOUT = 0.5
BIDIRECTIONAL = False
PADDING_IDX = TEXT.vocab.stoi[TEXT.pad_token]
model = RNN('lstm', INPUT_DIM, EMBEDDING_SIZE, HIDDEN_SIZE, OUTPUT_SIZE,
         NUM_LAYERS, DROPOUT, BIDIRECTIONAL, PADDING_IDX)

LEARNING_RATE = 0.001
train_len = 0
train_pos = 0
for batch in train_it:
    train_len += len(batch.decision_binary)
    train_pos += batch.decision_binary.sum().item()
POS_WEIGHT = torch.tensor([(train_len - train_pos) / train_pos])
if USE_CUDA:
    POS_WEIGHT = POS_WEIGHT.cuda()
EPOCHS = 20
tm = Training_module(model, LEARNING_RATE, POS_WEIGHT, USE_CUDA, EPOCHS)

#Training the model
best_model = tm.train_model(train_it, val_it)

Epoch 0: Dev Accuracy: 0.8291666666666666 Dev Loss:0.7926198244094849
Epoch 1: Dev Accuracy: 0.8125 Dev Loss:0.7885544002056122
Epoch 2: Dev Accuracy: 0.7791666666666667 Dev Loss:0.791813482840856
Epoch 3: Dev Accuracy: 0.7958333333333333 Dev Loss:0.7887606173753738
Epoch 4: Dev Accuracy: 0.7458333333333332 Dev Loss:0.796973263223966
Epoch 5: Dev Accuracy: 0.8624999999999999 Dev Loss:0.6752122516433398
Epoch 6: Dev Accuracy: 0.8458333333333332 Dev Loss:0.7879130740960439
Epoch 7: Dev Accuracy: 0.725 Dev Loss:0.7991801798343658
Epoch 8: Dev Accuracy: 0.12083333333333333 Dev Loss:0.8146655907233556
Epoch 9: Dev Accuracy: 0.6416666666666666 Dev Loss:1.01590450356404
Epoch 10: Dev Accuracy: 0.6833333333333332 Dev Loss:0.7379422361652056
Epoch 11: Dev Accuracy: 0.4583333333333333 Dev Loss:0.8417320549488068
Epoch 12: Dev Accuracy: 0.525 Dev Loss:0.8037772178649902
Epoch 13: Dev Accuracy: 0.5416666666666666 Dev Loss:0.8881389101346334
Epoch 14: Dev Accuracy: 0.5416666666666666 Dev Loss:0.859