WiC Project \
Kaleb Moore, Maria Garcia, Garrett Hite, Huajian Qiao


In [1]:
import torch
from torch import nn
from torch import optim
import os
import string
import copy
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from pytorch_transformers import *
import numpy as np
import json
import collections
import transformers
import pandas as pd
import random
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

In [2]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

TRAINING PARAMETERS

In [3]:
#Determine how many elements we want to train during each iteration
BATCH_SIZE = 32
EPOCHS = 15
PATIENCE = 10
# Prepare Torch to use GPU, and use CPU when it's not available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #torch.device("cpu")
n_gpu = torch.cuda.device_count()
#Get the GPU device name
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1080'

Helper functions for processing

In [4]:
#Readfile function to take all the objects out of jsonl files
def parse_file_to_JSON(filename):
    serparated_json_objs = []
    
    #grab each line and add it as an element in json objs arr
    with open(filename, mode = "r") as jsonl_file:
        for i in jsonl_file:
            serparated_json_objs.append(json.loads(i))

    return serparated_json_objs

#Take a list of words (strings) and a sentence (as a RoBERTa tokenized ID list) and returns a list
#of pairs indicating the tokens' start and end positions in the sentence for each word
#Create a function that matches word in tokenized sentence
def find_word_in_tokenized_sentence(word,token_ids):
    decomposedWord = tokenizer.encode(word)
   #Iterate through to find a matching sublist of the token_ids
    for i in range(len(token_ids)):
        if token_ids[i] == decomposedWord[0] and token_ids[i:i+len(decomposedWord)] == decomposedWord:
            return (i,i+len(decomposedWord)-1)
    #finalize the output if there is no matching pattern found
    return (-1,-1)
  
def find_words_in_tokenized_sentences(wordList,token_ids):
    #Create a intList that marks the positions of words
    intList = []
    #if intList is empty, call the previous function as no matching pattern found
    for word in wordList:
        if len(intList) == 0:
            intList.append(find_word_in_tokenized_sentence(word,token_ids))
        else:
            afterLastInterval = intList[-1][1]+1
            interv = find_word_in_tokenized_sentence(word,token_ids[afterLastInterval:])
            actualPositions = (interv[0] + afterLastInterval,interv[1]+afterLastInterval)
            intList.append(actualPositions)
    return intList

In [5]:
def preprocessing(json_objects, training = True):
    
    wic_sentences, wic_encoded, wic_labels, wic_word_locs, wic_indexes = [], [], [] ,[] ,[]
    
    for index, example in enumerate(json_objects):
        
        wic_indexes.append(index)
        sentence = f"<s>{example['sentence1']}</s><s>{example['sentence2']}</s>"
        wic_sentences.append(sentence)

        wic_encoded.append(tokenizer.encode(sentence, add_special_tokens=False))
        
        # locate word in context
        word = example['word']
        location_of_word = (-1, -1)
        sent1_split = example['sentence1'].split(' ')
        sent2_split = example['sentence2'].split(' ')
        
        # wic indx
        sent1_word_char_loc = (example['start1'], example['end1'])
        sent2_word_char_loc = (example['start2'], example['end2'])
        
        num_characters = 0
        
        i, j = 0, 0
        word1_not_found, word2_not_found = True, True
        
        #locate word one
        while word1_not_found and i < len(sent1_split):
            word_len = len(sent1_split[i])
            if num_characters >= sent1_word_char_loc[0] or num_characters + word_len >= sent1_word_char_loc[1]:
                location_of_word = (i, -1) # Found the word in the sentence
                word1_not_found = False
            elif num_characters > sent1_word_char_loc[1]:
                location_of_word = (i - 1, -1)
                word1_not_found = False
            else:
                num_characters += word_len + 1 
                i += 1
                
        #locate word two
        num_characters = 0
        
        while word2_not_found and j < len(sent2_split):
            word_len = len(sent2_split[j])
            if num_characters >= sent2_word_char_loc[0] or num_characters + word_len >= sent2_word_char_loc[1]:
                location_of_word = (i, j)
                word2_not_found = False
            elif num_characters > sent2_word_char_loc[1]:
                location_of_word = (i, j - 1)
                word2_not_found = False
            else:
                num_characters += word_len + 1
                j += 1
                
        # Now to find the word in the tokenized sentences
        word1 = sent1_split[location_of_word[0]].translate(str.maketrans('', '', string.punctuation)) #Remove punctuation (See https://stackoverflow.com/questions/265960/best-way-to-strip-punctuation-from-a-string)
        word2 = sent2_split[location_of_word[1]].translate(str.maketrans('', '', string.punctuation)) #Remove punctuation
        token_word_locs = find_words_in_tokenized_sentences([word1, word2], wic_encoded[-1])
        wic_word_locs.append(token_word_locs)
        
        # Get the label if we expect it to be there
        if training:
            if example['label']:
                wic_labels.append(1)
            else:
                wic_labels.append(0)
                
    # Pad the sequences and find the encoded word location in the combined input
    max_len = np.array([len(ex) for ex in wic_encoded]).max()
    wic_padded = {"input_ids" : [], "attention_mask" : [], "token_type_ids" : [], "word1_locs": [], "word2_locs" : [], "index" : wic_indexes}
    for i in range(0, len(wic_encoded)):
        enc_sentence = wic_encoded[i]
        location_of_word = wic_word_locs[i]
        # Pad the sequences
        ex_len = len(enc_sentence)
        padded_sentence = enc_sentence.copy()
        padded_sentence.extend([0]*(max_len - ex_len))
        wic_padded["input_ids"].append(padded_sentence)
        padded_mask = [1] * ex_len
        padded_mask.extend([0]*(max_len - ex_len))
        wic_padded["attention_mask"].append(padded_mask)
        # Create the vector to get back the words after RoBERTa
        token_word_locs = wic_word_locs[i]
        first_word_loc = []
        second_word_loc = []
        len_first_word = token_word_locs[0][1] - token_word_locs[0][0] + 1
        len_second_word = token_word_locs[1][1] - token_word_locs[1][0] + 1
        for j in range(0, max_len):
            if j >= token_word_locs[0][0] and j <= token_word_locs[0][1]:
                #Part of the first word
                first_word_loc.append(1.0 / len_first_word)
            else:
                first_word_loc.append(0.0)
            if j >= token_word_locs[1][0] and j <= token_word_locs[1][1]:
                #Part of the second word
                second_word_loc.append(1.0 / len_second_word)
            else:
                second_word_loc.append(0.0)
        #We want to append a [1, max_len] vector instead of a [max_len] vector so wrap in an array
        wic_padded["word1_locs"].append([first_word_loc])
        wic_padded["word2_locs"].append([second_word_loc])
        #token_type_ids is a mask that tells where the first and second sentences are
        token_type_id = []
        first_sentence = True
        sentence_start = True
        for token in padded_sentence:
            if first_sentence and sentence_start and token == 0:
                #Allows 0 at the start of the first sentence
                token_type_id.append(0)
            elif first_sentence and token > 0:
                if sentence_start:
                    sentence_start = False
                token_type_id.append(0)
            elif first_sentence and not sentence_start and token == 0:
                first_sentence = False
                #Start of second sentence
                token_type_id.append(1)
            else:
                #Second sentence
                token_type_id.append(1)
        wic_padded["token_type_ids"].append(token_type_id)
        
    if training:
        for_tensor = {"input_ids": wic_padded["input_ids"], "token_type_ids": wic_padded["token_type_ids"],"attention_mask": wic_padded["attention_mask"], "labels": wic_labels, "index" : wic_padded["index"],"word1_locs": wic_padded["word1_locs"], "word2_locs" : wic_padded["word2_locs"]}
    else:
        for_tensor = {"input_ids": wic_padded["input_ids"], "token_type_ids": wic_padded["token_type_ids"], "attention_mask": wic_padded["attention_mask"], "index" : wic_padded["index"], "word1_locs": wic_padded["word1_locs"], "word2_locs" : wic_padded["word2_locs"]}

    return for_tensor


Read in jsonl, process our data for creating PyTorch dataset for model.

In [6]:
#Data processing
train_json_objs = parse_file_to_JSON("WiC/train.jsonl")
raw_train_set = preprocessing(train_json_objs)
print(len(raw_train_set["labels"])/BATCH_SIZE)

169.625


In [7]:
#This makes our training data set for the training loop
train_data = TensorDataset(
    torch.tensor(raw_train_set["input_ids"]).to(device),
    torch.tensor(raw_train_set["token_type_ids"]).to(device),
    torch.tensor(raw_train_set["attention_mask"]).to(device),
    torch.tensor(raw_train_set["labels"]).to(device),
    torch.tensor(raw_train_set["word1_locs"]).to(device),
    torch.tensor(raw_train_set["word2_locs"]).to(device),
    torch.tensor(raw_train_set["index"]).to(device)
)

#This makes the sampler and data loader for our training loop
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)

In [8]:
#This loads the jsonl files and make json using the helper functions
test_json_objs = parse_file_to_JSON("WiC/test.jsonl")
valid_json_objs = parse_file_to_JSON("WiC/val.jsonl")

#This does the preprocessing step for our json objects
raw_test_set = preprocessing(test_json_objs, training = False)
raw_valid_set = preprocessing(valid_json_objs)

#These are out test and validation data sets to be used to get our final accuracy and our results
#for the test.jsonl file.
test_data = TensorDataset(
    torch.tensor(raw_test_set["input_ids"]).to(device),
    torch.tensor(raw_test_set["token_type_ids"]).to(device),
    torch.tensor(raw_test_set["attention_mask"]).to(device),
    torch.tensor(raw_test_set["word1_locs"]).to(device),
    torch.tensor(raw_test_set["word2_locs"]).to(device),
    torch.tensor(raw_test_set["index"]).to(device)
)
validation_data = TensorDataset(
    torch.tensor(raw_valid_set["input_ids"]).to(device),
    torch.tensor(raw_valid_set["token_type_ids"]).to(device),
    torch.tensor(raw_valid_set["attention_mask"]).to(device),
    torch.tensor(raw_valid_set["labels"]).to(device),
    torch.tensor(raw_valid_set["word1_locs"]).to(device),
    torch.tensor(raw_valid_set["word2_locs"]).to(device),
    torch.tensor(raw_valid_set["index"]).to(device)
)

#This makes the sampler and data loader for the end of our program
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=BATCH_SIZE)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=BATCH_SIZE)

Model Definition

In [9]:
#Loading in model
model = RobertaForMaskedLM.from_pretrained('roberta-base')

class WiC_Head(torch.nn.Module):
    def __init__(self, roberta_based_model, embedding_size = 768):
        """
        We are using a roBERTa model, adding a linear layer to take the distance between two 
        """
        super(WiC_Head, self).__init__()
        self.embedding_size = embedding_size
        self.embedder = roberta_based_model
        self.linear_diff = torch.nn.Linear(embedding_size, 250, bias = True)
        self.linear_seperator = torch.nn.Linear(250, 2, bias = True)
        self.loss = torch.nn.CrossEntropyLoss()
        self.activation = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

    def forward(self, input_ids=None, attention_mask=None, labels=None,
                word1_locs = None, word2_locs = None):
        """
        same parameters as RoBERTa forward adding two tensors for the location of the 2 words to compare them
        """
        batch_size = word1_locs.shape[0]
        # get the embeddings (numerical representation)
        embs, _ = self.embedder.roberta(input_ids=input_ids, attention_mask=attention_mask)
        # The words from the sentences
        word1s = torch.matmul(word1_locs, embs).view(batch_size, self.embedding_size)
        word2s = torch.matmul(word2_locs, embs).view(batch_size, self.embedding_size)
        
        # seeing how different are the words by substracting the numbers that represent the words
        diff = word1s - word2s
        
        # Calculate outputs using activation
        layer1_results = self.activation(self.linear_diff(diff))
        logits = self.softmax(self.linear_seperator(layer1_results))
        outputs = logits
        
        # Calculate prediction label
        if labels is not None:
            loss = self.loss(logits.view(-1, 2), labels.view(-1))
            outputs = (loss, logits)
        return outputs

In [10]:
#The model to be used
class_model = WiC_Head(model, embedding_size = 768)

In [11]:
#Testing the accuracy of our model
def flat_accuracy(preds, labels, return_predict_correctness = False):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    if return_predict_correctness:
        return np.sum(pred_flat == labels_flat) / len(labels_flat), pred_flat == labels_flat
    else:
        return np.sum(pred_flat == labels_flat) / len(labels_flat)

Training

In [12]:
#Accuracy desired for an A
MIN_ACCURACY = 0.70
REACHED_MIN_ACCURACY = False
best_weights = class_model.state_dict()
max_val_acc = (0, 0)
#Put the model in the GPU
class_model.cuda()

#Optimizer: changing the weights to make the moder optimal
param_optimizer = list(class_model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]
#Optimizer comes from hugging bert models
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)

#Storing accuracy and loss
fit_history = {"loss": [],  "accuracy": [], "val_loss": [], "val_accuracy": []}
epoch_number = 0
epoch_since_max = 0
continue_learning = True

#This loop goes through the training process for each of the epochs
while epoch_number < EPOCHS and continue_learning:
    epoch_number += 1
    print(f"Training epoch #{epoch_number}")
    #Tracking variables
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    #Set the model to training mode so we can train it
    class_model.train()
    #Set the final weights
    class_model.embedder.requires_grad_ = False
    
    #This for loop goes through each of the batches in the epochs for training 
    #This loop trains each batch
    for step, batch in enumerate(train_dataloader):
        #Use the GPU to train the batch
        batch = tuple(t.cuda() for t in batch)
        #Get the items to be used from the data loader
        b_input_ids, b_token_ids, b_input_mask, b_labels, b_word1, b_word2, b_index = batch
        #Clear out the gradients
        optimizer.zero_grad()
        #Forward training
        loss, logits = class_model(b_input_ids, attention_mask=b_input_mask, labels=b_labels, word1_locs = b_word1, word2_locs = b_word2) 
        #Backward training
        loss.backward()
        #Update parameters
        optimizer.step()
        #Update data to the CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.cpu().numpy()
        #Calculate the accuracy
        b_accuracy = flat_accuracy(logits, label_ids)
        #Append to fit history
        fit_history["loss"].append(loss.item()) 
        fit_history["accuracy"].append(b_accuracy) 
        #Update tracking variables
        tr_loss += loss.item()
        tr_accuracy += b_accuracy
        nb_tr_examples += b_input_ids.size(0)
        nb_tr_steps += 1
        #This prints the current batch's loss and accuracy
        if nb_tr_steps%10 == 0:
            print("\t\tTraining Batch {}: Loss: {}; Accuracy: {}".format(nb_tr_steps, loss.item(), b_accuracy))
    print("Training:\n\tLoss: {}; Accuracy: {}".format(tr_loss/nb_tr_steps, tr_accuracy/nb_tr_steps))
    #Set model to evaluation mode so we can evaluate without training
    
    class_model.eval()
    #Evaluate data for one epoch
    for batch in validation_dataloader:
        #Add batch to GPU
        batch = tuple(t.cuda() for t in batch)
        #Unpack the inputs from our dataloader
        b_input_ids, b_token_ids, b_input_mask, b_labels, b_word1, b_word2, b_index = batch
        #not computing gradients
        with torch.no_grad():
            #Forward pass, calculate logit predictions
            loss, logits = class_model(b_input_ids, attention_mask=b_input_mask, labels=b_labels, word1_locs = b_word1, word2_locs = b_word2)
        #Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.cpu().numpy()
        #Calculate the accuracy
        b_accuracy = flat_accuracy(logits, label_ids)
        #Append to fit history
        fit_history["val_loss"].append(loss.item()) 
        fit_history["val_accuracy"].append(b_accuracy) 
        #Update tracking variables
        eval_loss += loss.item()
        eval_accuracy += b_accuracy
        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1
        if nb_eval_steps%10 == 0:
            print("\t\tValidation Batch {}: Loss: {}; Accuracy: {}".format(nb_eval_steps, loss.item(), b_accuracy))
            
    #This section of the code is to determine whether we need to keep training or not
    #i.e. if we exceed the min acuracy needed we stop training or if we meet the epoch number we specified previously
    eval_acc = eval_accuracy/nb_eval_steps
    if eval_acc >= max_val_acc[0]:
        max_val_acc = (eval_acc, epoch_number)
        continue_learning = True
        epoch_since_max = 0
        #This records the best weights to be added to the trained model
        best_weights = copy.deepcopy(class_model.state_dict())
        #See if we have reached min_accuracy
        if eval_acc >= MIN_ACCURACY:
            REACHED_MIN_ACCURACY = True
        #When it has reached min accuracy we want to end the learning process
        if REACHED_MIN_ACCURACY:
            continue_learning = False # No necessary to continue learning
    else:
        epoch_since_max += 1
        #If the desired accuracy isn't met, then we stop it with the patience value
        if epoch_since_max > PATIENCE:
            continue_learning = False
    print("Validation:\n\tLoss={}; Accuracy: {}".format(eval_loss/nb_eval_steps, eval_accuracy/nb_eval_steps))
print(f"Best accuracy ({max_val_acc[0]}) obtained at epoch #{max_val_acc[1]}.")
#Reload the best weights (from memory)
class_model.load_state_dict(best_weights)

Training epoch #1


  logits = self.softmax(self.linear_seperator(layer1_results))
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:882.)
  exp_avg.mul_(beta1).add_(1.0 - beta1, grad)


		Training Batch 10: Loss: 0.690142810344696; Accuracy: 0.5
		Training Batch 20: Loss: 0.6857261657714844; Accuracy: 0.625
		Training Batch 30: Loss: 0.6938253045082092; Accuracy: 0.4375
		Training Batch 40: Loss: 0.6842079758644104; Accuracy: 0.65625
		Training Batch 50: Loss: 0.6925121545791626; Accuracy: 0.40625
		Training Batch 60: Loss: 0.6883546113967896; Accuracy: 0.53125
		Training Batch 70: Loss: 0.7006716728210449; Accuracy: 0.5
		Training Batch 80: Loss: 0.6901718378067017; Accuracy: 0.40625
		Training Batch 90: Loss: 0.6735632419586182; Accuracy: 0.625
		Training Batch 100: Loss: 0.6774916052818298; Accuracy: 0.78125
		Training Batch 110: Loss: 0.6818719506263733; Accuracy: 0.5
		Training Batch 120: Loss: 0.7062422633171082; Accuracy: 0.46875
		Training Batch 130: Loss: 0.6584559082984924; Accuracy: 0.65625
		Training Batch 140: Loss: 0.6782318949699402; Accuracy: 0.5
		Training Batch 150: Loss: 0.6595319509506226; Accuracy: 0.65625
		Training Batch 160: Loss: 0.66404420137

		Training Batch 10: Loss: 0.3861722946166992; Accuracy: 0.90625
		Training Batch 20: Loss: 0.3558417558670044; Accuracy: 0.9375
		Training Batch 30: Loss: 0.3901459276676178; Accuracy: 0.9375
		Training Batch 40: Loss: 0.35818907618522644; Accuracy: 0.96875
		Training Batch 50: Loss: 0.353480726480484; Accuracy: 0.96875
		Training Batch 60: Loss: 0.4527171552181244; Accuracy: 0.84375
		Training Batch 70: Loss: 0.3874188959598541; Accuracy: 0.90625
		Training Batch 80: Loss: 0.37252724170684814; Accuracy: 0.9375
		Training Batch 90: Loss: 0.40480276942253113; Accuracy: 0.90625
		Training Batch 100: Loss: 0.31988731026649475; Accuracy: 1.0
		Training Batch 110: Loss: 0.4233754575252533; Accuracy: 0.875
		Training Batch 120: Loss: 0.4624600112438202; Accuracy: 0.84375
		Training Batch 130: Loss: 0.39940282702445984; Accuracy: 0.90625
		Training Batch 140: Loss: 0.4493832588195801; Accuracy: 0.84375
		Training Batch 150: Loss: 0.36934810876846313; Accuracy: 0.9375
		Training Batch 160: Lo

<All keys matched successfully>

In [13]:
#helper to normilize our predictions
def normalize(preds):
    pred_flat = np.argmax(preds, axis=1).flatten()
    return pred_flat == 1


Validation

In [14]:
validation_predictions_correctness = {}
# Validation

eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

# Put model in evaluation mode
class_model.eval()

#Evaluation loop
for batch in validation_dataloader:
    # Add batch to GPU
    batch = tuple(t.cuda() for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_token_ids, b_input_mask, b_labels, b_word1, b_word2, b_index = batch
    
    #Adapted gradient optimizer
    with torch.no_grad():
        loss, logits = class_model(b_input_ids, attention_mask=b_input_mask, 
                                    labels=b_labels, word1_locs = b_word1, word2_locs = b_word2)

    #Use CPU for accuracy calcs
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.cpu().numpy()
    b_accuracy, b_pred_correctness = flat_accuracy(logits, label_ids, return_predict_correctness = True)
    indexes = b_index.detach().cpu().numpy()

    for index, pred in zip(indexes, b_pred_correctness):
        validation_predictions_correctness[index] = pred

    eval_loss += loss.item()
    eval_accuracy += b_accuracy
    nb_eval_examples += b_input_ids.size(0)
    nb_eval_steps += 1

    if nb_eval_steps%10 == 0:
        print("\t\tValidation Batch {}: Loss: {}; Accuracy: {}".format(nb_eval_steps, loss.item(), b_accuracy))
        
print("Validation:\n\tLoss={}; Accuracy: {}".format(eval_loss/nb_eval_steps, eval_accuracy/nb_eval_steps))
validation_predictions_correctness = collections.OrderedDict(sorted(validation_predictions_correctness.items()))
print(validation_predictions_correctness)

  logits = self.softmax(self.linear_seperator(layer1_results))


		Validation Batch 10: Loss: 0.6408935189247131; Accuracy: 0.65625
		Validation Batch 20: Loss: 0.6085684299468994; Accuracy: 0.6666666666666666
Validation:
	Loss=0.6007347524166107; Accuracy: 0.7036458333333333
OrderedDict([(0, True), (1, True), (2, False), (3, False), (4, True), (5, True), (6, True), (7, False), (8, True), (9, False), (10, True), (11, True), (12, True), (13, False), (14, False), (15, True), (16, True), (17, False), (18, True), (19, True), (20, True), (21, False), (22, True), (23, True), (24, True), (25, True), (26, False), (27, False), (28, True), (29, True), (30, True), (31, True), (32, True), (33, True), (34, False), (35, True), (36, False), (37, True), (38, True), (39, True), (40, False), (41, True), (42, True), (43, True), (44, True), (45, False), (46, False), (47, True), (48, False), (49, True), (50, True), (51, True), (52, False), (53, True), (54, True), (55, True), (56, False), (57, False), (58, False), (59, False), (60, True), (61, True), (62, True), (63, Fal

Final Testing

In [15]:
test_predictions = {}
test_loss, test_accuracy = 0, 0
nb_test_examples, nb_test_steps = 0, 0
#Put model in evaluation mode to evaluate loss on the validation set
class_model.eval()

#We only need to evaluate on one epoch so this lopp will go through the batches on one epoch
for batch in test_dataloader:
    #Add batch to GPU
    batch = tuple(t.cuda() for t in batch)
    #Unpack the inputs from our dataloader
    b_input_ids, b_token_ids, b_input_mask, b_word1, b_word2, b_index = batch
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        logits = class_model(b_input_ids, attention_mask=b_input_mask, word1_locs = b_word1, word2_locs = b_word2)
    #Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    # Get the predictions
    b_preds = normalize(logits)
    indexes = b_index.detach().cpu().numpy() # Get the indexes
    for index, pred in zip(indexes, b_preds):
        test_predictions[index] = pred
    #Update tracking variables
    test_loss += loss.item()
    test_accuracy += b_accuracy
    nb_test_examples += b_input_ids.size(0)
    nb_test_steps += 1
    if nb_test_steps%10 == 0:
        print("\t\tTest Batch {}".format(nb_test_steps))

#Print final results for the test.jsonl file 
print("Testing results:")
test_predictions = collections.OrderedDict(sorted(test_predictions.items()))
print(test_predictions)

  logits = self.softmax(self.linear_seperator(layer1_results))


		Test Batch 10
		Test Batch 20
		Test Batch 30
		Test Batch 40
Testing results:
OrderedDict([(0, True), (1, False), (2, True), (3, True), (4, False), (5, False), (6, False), (7, True), (8, False), (9, True), (10, True), (11, True), (12, False), (13, False), (14, True), (15, False), (16, True), (17, False), (18, True), (19, True), (20, False), (21, False), (22, True), (23, True), (24, False), (25, False), (26, True), (27, False), (28, True), (29, True), (30, True), (31, True), (32, True), (33, False), (34, False), (35, False), (36, False), (37, False), (38, True), (39, False), (40, True), (41, False), (42, True), (43, True), (44, True), (45, True), (46, False), (47, False), (48, False), (49, True), (50, True), (51, True), (52, True), (53, True), (54, True), (55, False), (56, True), (57, False), (58, True), (59, False), (60, True), (61, False), (62, False), (63, False), (64, True), (65, False), (66, False), (67, False), (68, True), (69, True), (70, False), (71, True), (72, True), (73, T