### This code was run on Google Colab website using GPU. 

In [3]:
# First, we will load packages necessary to run pytorch
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

tcmalloc: large alloc 1073750016 bytes == 0x591d6000 @  0x7f1de77e62a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070


In [0]:
# Importing libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from collections import Counter
import pickle as pkl
import random
import pdb
random.seed(134)
import pandas as pd
import io
from google.colab import drive
import json

PAD_IDX = 0
UNK_IDX = 1
BATCH_SIZE = 32

# use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
#Mount google drive
drive.mount('/content/gdrive') 

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [5]:
!ls 'gdrive/My Drive/mnli/hw2_data.nosync'

crawl-300d-2M.vec	       mnli_val_subset.csv  snli_val.tsv
data_understanding_prep.ipynb  mnli_val.tsv	    wiki-news-300d-1M.vec
mnli_train_subset.csv	       snli_train1.tsv
mnli_train.tsv		       snli_train.tsv


## Help functions

In [0]:
def build_vocab(all_tokens, embedding, max_vocab_size = 10000):

    # save index 1 for unk and 0 for pad
    PAD_IDX = 0
    UNK_IDX = 1
    # Returns:
    # id2token: list of tokens, where id2token[i] returns token that corresponds to token i
    # token2id: dictionary where keys represent tokens and corresponding values represent indices
    all_tokens = [item for sublist in all_tokens for item in sublist]
    max_len = max([len(word) for word in all_tokens])
    
    unique_words = list(embedding.keys())
    
    id2token =  unique_words #list of words available in embedding
    id2token = ['<pad>', '<unk>'] + id2token #add pad and unknown to the beginning
    
    token2id = dict(zip(unique_words, range(2,2+len(unique_words)))) # dictionary of words and indices 
    token2id['<pad>'] = PAD_IDX  #add pad symbol to the dictionary
    token2id['<unk>'] = UNK_IDX  #add unkown symbol to the dictionary
    
    return token2id, id2token, max_len

# convert token to id in the dataset
def token2index_dataset(tokens_data):
    PAD_IDX = 0
    UNK_IDX = 1
    indices_data = []
    for tokens in tokens_data:
        index_list = [list(token2id[token]) if token in token2id else UNK_IDX for token in tokens] #tokenizes 10k words
        indices_data.append(index_list) #list of lists: indices of tokens for each sentence
    return indices_data


In [0]:
def load_embedding(fname, max_count=None):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    counter=0
    for line in fin:
        counter+=1
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = map(float, tokens[1:])
        if counter==max_count:
            break
    return data

In [0]:
def read_data(file_loc, sep="\t"):
    #Read in data subsets
    data = pd.read_csv(file_loc, sep=sep, encoding='latin-1')
    return data

def tokenize(data):
    data['input1'] = data.sentence1.str.split()

    data['input2'] = data.sentence2.str.split()
    return data

def assign_target(name):
    if name == 'contradiction':
        return 0
    elif name == 'neutral':
        return 1
    else:
        return 2

In [0]:
class VocabDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, data_tuple, word2id):
        """
        @param data_list: list of character
        @param target_list: list of targets

        """
        self.data_list1, self.data_list2, self.target_list = zip(*data_tuple)
        assert (len(self.data_list1) == len(self.target_list) == len(self.data_list2))
        self.word2id = word2id

    def __len__(self):
        return len(self.data_list1)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        word_idx1 = [self.word2id[c] if c in self.word2id.keys() 
                    else UNK_IDX  for c in self.data_list1[key][:MAX_WORD_LENGTH]]
                                                                   
        word_idx2 = [self.word2id[c] if c in self.word2id.keys() 
                    else UNK_IDX  for c in self.data_list2[key][:MAX_WORD_LENGTH]]                                                                   
                                                                   
        label = self.target_list[key]
        return [word_idx1, word_idx2, len(word_idx1), len(word_idx2), label]

def vocab_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all
    data have the same length
    """
    data_list1 = []
    data_list2 = []
    label_list = []
    length_list1 = []
    length_list2 = []

        
    # padding
    for datum in batch:
        x1 = datum[0]
        x2 = datum[1]
        len1 = datum[2]
        len2 = datum[3]
        label = datum[4]
        
        label_list.append(label)
        length_list1.append(len1)
        length_list2.append(len2)
        #Pad first sentences
        padded_vec1 = np.pad(np.array(x1),
                                pad_width=((0,MAX_WORD_LENGTH-len1)),
                                mode="constant", constant_values=0)
        data_list1.append(padded_vec1)
        
        #Pad second sentences
        padded_vec2 = np.pad(np.array(x2),
                        pad_width=((0,MAX_WORD_LENGTH-len2)),
                        mode="constant", constant_values=0)
        data_list2.append(padded_vec2)
        
    data_list1 = np.array(data_list1)
    data_list2 = np.array(data_list2)
    length_list1 = np.array(length_list1)
    lenth_list2 = np.array(length_list2)
    label_list = np.array(label_list)
    
    return [torch.from_numpy(np.array(data_list1)), 
            torch.from_numpy(np.array(data_list2)),
            torch.LongTensor(length_list1), 
            torch.LongTensor(length_list2),
            torch.LongTensor(label_list)]


## Read and process data

In [0]:
#Read in data subsets
train_data = read_data('gdrive/My Drive/mnli/hw2_data.nosync/snli_train.tsv', sep='\t')
val_data = read_data('gdrive/My Drive/mnli/hw2_data.nosync/snli_val.tsv', sep='\t')

#Tokenize
train_data = tokenize(train_data)
val_data = tokenize(val_data)


#Assign label
train_data['target'] = train_data.label.apply(lambda x: assign_target(x))
val_data['target'] = val_data.label.apply(lambda x: assign_target(x))

In [0]:
#Read in pretrained embedding vectors - subset for now
embeddings_map = load_embedding('gdrive/My Drive/mnli/hw2_data.nosync/wiki-news-300d-1M.vec', max_count=50000)

#Convert embedding values to lists
embeddings = {}

for key, value in embeddings_map.items():
    embeddings[key] = list(value)

In [0]:
#Build vocabulary on train set
token2id, id2token, max_len = build_vocab(train_data['input1'] + train_data['input2'],
                              embeddings)

all_tokens = [item for sublist in train_data['input1'] + train_data['input2'] for item in sublist]
max_len = max([len(word) for word in all_tokens])

In [0]:
# Build train, valid and test dataloaders

#Embed each input and create loaders

MAX_WORD_LENGTH = max_len

train_dataset = VocabDataset(zip(train_data.input1,train_data.input2, 
                                           train_data.target), token2id)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

val_dataset = VocabDataset(zip(val_data.input1,val_data.input2, 
                                           val_data.target), token2id)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

In [0]:
#Convert embedding to tensor
import numpy as np

y=np.array([np.array(list(xi)) for xi in embeddings.values()])
padding = np.zeros((1, y.shape[1]))
unknown = np.random.rand(1, y.shape[1]) # to account for Padding and Unknown
full_size = np.concatenate([padding, unknown, y], axis=0)
emb_weights = torch.from_numpy(full_size)

## Modeling

### Now lets implement bidirectional GRU Recurrent Neural Net model

In [0]:
class GRU(nn.Module):
    def __init__(self, emb_weights, emb_size, hidden_size, num_layers, num_classes, vocab_size, dropout=0):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(GRU, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding =  nn.Embedding(vocab_size, emb_size, 
                                       padding_idx=PAD_IDX).from_pretrained(emb_weights, 
                                                                freeze=True).to(device) #load preset

        self.gru = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True,
                         bidirectional=True, dropout=dropout).to(device) #creates bidirectional GRU
        self.linear1 = nn.Linear(hidden_size*2*2, hidden_size*2*2).to(device) #2 for bidirectional, 2 for concatenated
        self.linear2 = nn.Linear(hidden_size*2*2, num_classes).to(device) #2 for bidirectional, 2 for concatenated
        #self.linear1 = nn.Linear(hidden_size*2*2, num_classes).to(device)

        
    def forward(self, x1, x2, len1, len2):
        
        # sorts in each forward pass

        batch_size, seq_len = x1.size()
 
        # get embedding of characters - make sure pretrained weights do not get updated
        embed1 = self.embedding(x1.to(device))
        embed2 = self.embedding(x2.to(device))
        
      
        # fprop though RNN
        rnn_out1, h1 = self.gru(embed1.float())
        rnn_out2, h2 = self.gru(embed2.float())
        
        # [num_dir, batch_size, dim] => [batch_size, dim x num_dir]
        num_dir, batch_size, dim = h1.shape
        h1 = h1.transpose(0, 1).contiguous().view(batch_size, -1)
        h2 = h2.transpose(0, 1).contiguous().view(batch_size, -1)
        
        #Concatenate two vectors
        combined_vector = torch.cat([h1, h2], dim=1)
        
        logits1 = self.linear1(combined_vector) #FC layer
        logits2 = self.linear2(logits1) #second FC layer
        
        return logits2



In [0]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for sentence1, sentence2, lengths1, lengths2, labels in loader:

        outputs = F.softmax(model(sentence1, sentence2, lengths1, lengths2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1].to(device)

        total += labels.size(0)
        correct += predicted.eq(labels.to(device).view_as(predicted)).sum().item()
    return (100 * correct / total)

In [0]:
def calculate_loss(loader, model, criterion):
    '''Calculate loss for evaluation'''
    model.eval()
    loss_hist = []
    for x1, x2, len1, len2, labels in loader:
      
        y_hat = model(x1, x2, len1, len2).to(device)
        loss = criterion(y_hat, labels.to(device))
        loss_hist.append(loss.item())
    average_loss = np.mean(loss_hist)
    return average_loss

In [0]:

def train(train_loader, val_loader, model, num_epochs, learning_rate, decay=0):
    '''Train model'''
    
    # Criterion and Optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=decay)

    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []

    for epoch in range(num_epochs):
        for i, (sentence1, sentence2, lengths1, lengths2, labels) in enumerate(train_loader):
            model.train()
            optimizer.zero_grad()

            # Forward pass
            outputs = model(sentence1, sentence2, lengths1, lengths2)
            #print(outputs)
            loss = criterion(outputs, labels.to(device))

            # Backward and optimize
            loss.backward()

            optimizer.step()

            # validate every 1000 iterations
            if i > 0 and i % 1000 == 0:
                # validate
                val_acc = test_model(val_loader, model)
                print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}, Training Loss: {}'.format(
                           epoch+1, num_epochs, i+1, len(train_loader), val_acc, loss.item()))
                
                
                train_acc = test_model(train_loader, model)
                train_accs.append(train_acc)
                val_accs.append(val_acc)

                train_losses.append(calculate_loss(train_loader, model, criterion))
                val_losses.append(calculate_loss(val_loader, model, criterion))
                
    return train_losses, train_accs, val_losses, val_accs 

In [0]:
#Single layer bidirectional GRU
gru_model = GRU(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=200, num_layers=1, num_classes=3, 
            vocab_size=len(id2token))

gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc = train(train_loader, 
                                                                     val_loader, 
                                                                     gru_model, num_epochs=1, 
                                                                     learning_rate = 3e-4)

### GRU Tuning: hidden size

In [0]:
#Tuning hidden size

import json 

hidden_sizes = [50, 100, 200, 300, 500]

hidden_results = {}

for h in hidden_sizes:
    
    gru_model = GRU(emb_weights, emb_size=emb_weights.shape[1], 
                hidden_size=h, num_layers=1, num_classes=3, 
                vocab_size=len(id2token))

    gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc  = train(train_loader, 
                                                                          val_loader, gru_model, 
                                                                          num_epochs=10, learning_rate = 3e-4)
    
    hidden_results[h] = [gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc]
    
    #Save results
    with open('/content/gdrive/My Drive/mnli/gru_hidden500.txt', 'w') as f:
        f.write(json.dumps(hidden_results))
    
    




### GRU Tuning: weight decay

In [0]:
#Tuning Weight decay

decay_results = {}

decays = [1e-5, 1e-3, 1e-1, 0]

for d in decays:
    
    gru_model = GRU(emb_weights, emb_size=emb_weights.shape[1], 
                hidden_size=200, num_layers=1, num_classes=3, 
                vocab_size=len(id2token))

    gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc  = train(train_loader, 
                                                                          val_loader, gru_model, 
                                                                          num_epochs=10, learning_rate = 3e-4,
                                                                         decay = d)
    
    decay_results[d] = [gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc]
    
    with open('/content/gdrive/My Drive/mnli/gru_decay.txt', 'w') as f:
        f.write(json.dumps(decay_results))

### GRU Tuning: learning rate

In [None]:
# Tuning learning rate

learning_rates = [3e-5, 3e-4, 3e-3, 3e-2]

learning_results = {}

for l in learning_rates:
    
    gru_model = GRU(emb_weights, emb_size=emb_weights.shape[1], 
                hidden_size=200, num_layers=1, num_classes=3, 
                vocab_size=len(id2token))

    gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc  = train(train_loader, 
                                                                          val_loader, gru_model, 
                                                                          num_epochs=10, learning_rate = l,
                                                                         decay = 0)
    
    learning_results[l] = [gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc]
    
    with open('/content/gdrive/My Drive/mnli/gru_learning.txt', 'w') as f:
         f.write(json.dumps(learning_results))

### Now lets implement 2-layer Convolutional Neural Net model for text


In [0]:
class CNN(nn.Module):
    def __init__(self, emb_weights, emb_size, hidden_size, num_layers, num_classes, vocab_size,
                kernel_size=3, dropout=0):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding =  nn.Embedding(vocab_size, emb_size, 
                                       padding_idx=PAD_IDX).from_pretrained(emb_weights, 
                                                                freeze=True).to(device) #load preset
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=kernel_size, padding=1).to(device)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=kernel_size, padding=1).to(device)

        self.linear1 = nn.Linear(hidden_size*2, hidden_size).to(device) #2 for concatenated
        self.linear2 = nn.Linear(hidden_size, num_classes).to(device) # 2 for concatenated
    
     

    def forward(self, x1, x2, len1, len2):
        
        batch_size, seq_len = x1.size()
            
        embed1 = self.embedding(x1.to(device))
        hidden = self.conv1(embed1.float().transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, seq_len, hidden.size(-1))

        hidden = self.conv2(hidden.transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, seq_len, hidden.size(-1))
        #max pool over time
        hidden1 = torch.max(hidden, dim=1)[0]
        
        #Second sentence pass
        
        embed2 = self.embedding(x2.to(device))
        hidden2 = self.conv1(embed2.float().transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, seq_len, hidden.size(-1))

        hidden2 = self.conv2(hidden2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, seq_len, hidden.size(-1))
        #max pool over time
        hidden2 = torch.max(hidden2, dim=1)[0]
        
        #Concatenate two vectors
        combined_vector = torch.cat([hidden1, hidden2], dim=1)
        
        logits1 = self.linear1(combined_vector)
        logits2 = self.linear2(logits1)
        
        return logits2

In [0]:
#CNN
cnn_model = CNN(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=200, num_layers=2, num_classes=3, 
            vocab_size=len(id2token))

cnn_losses, cnn_accuracies = train(train_loader, val_loader, gru_model, num_epochs=50, learning_rate = 3e-4)

### CNN Tuning: Hidden Dimension Size

In [None]:
#CNN Tuning: hidden size
hidden_sizes = [50, 100, 200, 300, 500]

hidden_results = {}

for d in hidden_sizes:
    
    cnn_model = CNN(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=d, num_layers=2, num_classes=3, 
            vocab_size=len(id2token), kernel_size=3)

    cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc  = train(train_loader, 
                                                                          val_loader, cnn_model, 
                                                                          num_epochs=10, learning_rate = 3e-4)
    
    hidden_results[d] = [cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc]
    
    with open('/content/gdrive/My Drive/mnli/cnn_hidden.txt', 'w') as f:
        f.write(json.dumps(hidden_results))

### CNN Tuning: weight decay

In [None]:
#CNN Tuning: regularization
decay_results = {}

decays = [0, 1e-7, 1e-5, 1e-3, 1e-1]

for d in decays:
    
    cnn_model = CNN(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=200, num_layers=2, num_classes=3, 
            vocab_size=len(id2token), kernel_size=3)

    cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc  = train(train_loader, 
                                                                          val_loader, cnn_model, 
                                                                          num_epochs=10, learning_rate = 3e-4,
                                                                         decay = d)
    
    decay_results[d] = [cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc]
    
    with open('/content/gdrive/My Drive/mnli/cnn_decay.txt', 'w') as f:
        f.write(json.dumps(decay_results))

### CNN Tuning: dropout

In [None]:
#CNN Tuning: dropout
dropout_results = {}

dropouts = [0, 0.25, 0.5, 0.75]

for d in dropouts:
    
    cnn_model = CNN(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=200, num_layers=2, num_classes=3, 
            vocab_size=len(id2token), dropout=d)

    cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc  = train(train_loader, 
                                                                          val_loader, cnn_model, 
                                                                          num_epochs=10, learning_rate = 3e-4,
                                                                         decay = 0)
    
    dropout_results[d] = [cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc]
    
    with open('/content/gdrive/My Drive/mnli/cnn_dropout.txt', 'w') as f:
      f.write(json.dumps(dropout_results))

In [None]:
#Best model: CNN with weight decay 0.001
cnn_model_best = CNN(emb_weights, emb_size=emb_weights.shape[1], 
        hidden_size=200, num_layers=2, num_classes=3, 
        vocab_size=len(id2token))

cnn_train_losses, cnn_train_acc, cnn_val_losses, cnn_val_acc  = train(train_loader, 
                                                                      val_loader, cnn_model_best, 
                                                                      num_epochs=10, learning_rate = 3e-4,
                                                                     decay= 0.001)

In [None]:
#Evaluate on validation set
test_model(val_loader, cnn_model_best)

In [None]:
#Best model: GRU with hidden size 200
gru_model_best = GRU(emb_weights, emb_size=emb_weights.shape[1], 
            hidden_size=200, num_layers=1, num_classes=3, 
            vocab_size=len(id2token))

gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc  = train(train_loader, 
                                                                      val_loader, gru_model_best, 
                                                                      num_epochs=10, learning_rate = 3e-4)

In [None]:
#Evaluate on validation set
test_model(val_loader, gru_model_best)

### 3 correct and 3 incorrect predictions

In [0]:
#Output 3 correct and 3 incorrect predictions

#Create loader that doesn't shuffle input so we can recover actual text
val_loader_unshuffled = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=False)

def output_correct_incorrect(loader, model):
  
    '''Output indices of 3 correct and 3 incorrect predictions'''
    
    predictions = []
  
    correct_ind = []
    for sentence1, sentence2, lengths1, lengths2, labels in loader:

        outputs = F.softmax(model(sentence1, sentence2, lengths1, lengths2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]#.to(device)

        #Find correct and incorrect sentences
        pred = predicted.view(predicted.numel()).cpu().numpy()
        predictions.extend(pred)
        labs = labels.numpy()
        ind_correct = np.equal(pred,labs)
        correct_ind.extend(ind_correct)


    corrects = [i for i, x in enumerate(correct_ind) if x]
    incorrect = [i for i, x in enumerate(correct_ind) if not x]
    
    correct_ind = random.sample(corrects, 3)
    incorrect_ind = random.sample(incorrect, 3)
    
    return correct_ind, incorrect_ind, predictions



In [0]:
#3 correct and 3 incorrect predictions by the best CNN model
correct_ind, incorrect_ind, predictions = output_correct_incorrect(val_loader_unshuffled, cnn_model_best)

In [117]:
 print('Correct Predictions')

for i in correct_ind:

    print('\nPrediction: ', str(i))
    print(val_data.iloc[i][['sentence1', 'sentence2', 'label']].values)
    print('Predicted: ', predictions[i])

Correct Predictions

Prediction:  668
['Person doing a trick on a skateboard in a skate park while people watch .'
 'There is a person imitating a skateboard trick from Tony Hawk .'
 'neutral']
Predicted:  1

Prediction:  989
['A scientist with a checkered shirt on is looking into a microscope to learn more about the world .'
 'A scientist is using a microscope .' 'entailment']
Predicted:  2

Prediction:  310
['A blond woman wearing a blue and pink floral tie-front bikini on a beach , readying to put a flagpole in the sand .'
 'A person in a bikini' 'entailment']
Predicted:  2


In [118]:
#0=contradiction, 1=neutral, 2=entailment
print('Incorrect Predictions')

for i in incorrect_ind:

    print('\nPrediction: ', str(i))
    print(val_data.iloc[i][['sentence1', 'sentence2', 'label']].values)
    print('Predicted: ', predictions[i])

Incorrect Predictions

Prediction:  382
['A boy wearing a yellow jersey is accepting the baton from a female wearing a yellow jersey in a relay race .'
 'A boy accepting a baton in a race .' 'entailment']
Predicted:  1

Prediction:  222
['A woman in a blue shirt and black workout pants practicing martial arts in front of a house .'
 'A woman has a white shirt .' 'contradiction']
Predicted:  2

Prediction:  993
['An old bearded man plays a hand flute on the side of a sidewalk .'
 'An old bearded man plays a board game on the side of a sidewalk .'
 'contradiction']
Predicted:  1


### Test performance of the best models on MNLI by genre

In [0]:
#Test performance on NMLI by genre
mnli_data = read_data('gdrive/My Drive/mnli/hw2_data.nosync/mnli_val.tsv', sep='\t')
mnli_train = read_data('gdrive/My Drive/mnli/hw2_data.nosync/mnli_train.tsv', sep='\t')

#Tokenize
mnli_data = tokenize(mnli_data)
mnli_train = tokenize(mnli_train)


#Assign label
mnli_data['target'] = mnli_data.label.apply(lambda x: assign_target(x))
mnli_train['target'] = mnli_train.label.apply(lambda x: assign_target(x))

#Save genres
genres = mnli_data.genre.unique().tolist()


In [0]:
#Calculate accuracy by genre
acc_by_genre_cnn = {}

for genre in genres:

  #Create loaders
    mnli_genre = mnli_data[mnli_data.genre == genre]
    mnli_dataset = VocabDataset(zip(mnli_genre.input1,mnli_genre.input2, 
                                             mnli_genre.target), token2id)

    mnli_loader = torch.utils.data.DataLoader(dataset=mnli_dataset,
                                             batch_size=BATCH_SIZE,
                                             collate_fn=vocab_collate_func,
                                             shuffle=False)
  
  
  
    accuracy = test_model(mnli_loader, cnn_model_best)
    acc_by_genre_cnn[genre] = accuracy

In [121]:
#Accuracy by CNN model
acc_by_genre_cnn

{'fiction': 42.814070351758794,
 'government': 44.19291338582677,
 'slate': 40.818363273453095,
 'telephone': 43.28358208955224,
 'travel': 45.010183299389}

In [0]:
#Repeat with the best GRU model
acc_by_genre_gru = {}

for genre in genres:

  #Create loaders
    mnli_genre = mnli_data[mnli_data.genre == genre]
    mnli_dataset = VocabDataset(zip(mnli_genre.input1,mnli_genre.input2, 
                                             mnli_genre.target), token2id)

    mnli_loader = torch.utils.data.DataLoader(dataset=mnli_dataset,
                                             batch_size=BATCH_SIZE,
                                             collate_fn=vocab_collate_func,
                                             shuffle=False)
  
    accuracy = test_model(mnli_loader, gru_model_best)
    acc_by_genre_gru[genre] = accuracy

In [123]:
#Accuracy by GRU model
acc_by_genre_gru

{'fiction': 41.70854271356784,
 'government': 40.15748031496063,
 'slate': 39.920159680638726,
 'telephone': 40.995024875621894,
 'travel': 41.34419551934827}

### Optional: additional training on MNLI data

In [44]:
#Optional: fine-tuning on MNLI data
best_model = cnn_model_best
torch.save(best_model.state_dict(), 'gdrive/My Drive/mnli/best_model.pt')




acc_by_genre_extra = {}

for genre in genres:
  
    print(genre)
  
    #Create loaders
    mnli_genre = mnli_data[mnli_data.genre == genre]
    mnli_dataset = VocabDataset(zip(mnli_genre.input1,mnli_genre.input2, 
                                              mnli_genre.target), token2id)

    mnli_loader = torch.utils.data.DataLoader(dataset=mnli_dataset,
                                             batch_size=BATCH_SIZE,
                                             collate_fn=vocab_collate_func,
                                             shuffle=False)
  
  
    mnli_train_dataset = mnli_train[mnli_train.genre == genre]
    mnli_train_dataset = VocabDataset(zip(mnli_train_dataset.input1,mnli_train.input2, 
                                             mnli_train.target), token2id)

    mnli_train_loader = torch.utils.data.DataLoader(dataset=mnli_train_dataset,
                                             batch_size=BATCH_SIZE,
                                             collate_fn=vocab_collate_func,
                                             shuffle=True)
  
    best_model.load_state_dict(torch.load('gdrive/My Drive/mnli/best_model.pt'))
  
    gru_train_losses, gru_train_acc, gru_val_losses, gru_val_acc  = train(mnli_train_loader, 
                                                                      mnli_loader, best_model, 
                                                                      num_epochs=5, learning_rate = 3e-4, 
                                                                     decay = 0.001)
  
    accuracy = test_model(mnli_loader, best_model)
    print(accuracy)
    acc_by_genre_extra[genre] = accuracy

fiction
44.92462311557789
telephone
47.56218905472637
slate
44.510978043912175
government
47.83464566929134
travel
44.5010183299389
