# Eval template for MLM

In [1]:
import sys
import os
import random
import uuid

import numpy as np
import torch
import transformers
import datasets

torch.cuda.is_available()
cuda = torch.device('cuda')

## Loading Model

In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased", cache_dir='../bert_base_cache')
model = transformers.AutoModelForMaskedLM.from_pretrained("bert-base-uncased", cache_dir='../bert_base_cache')

model = model.to(device=cuda)
print(model.device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


cuda:0


## Loading SemEval Data

In [3]:
semeval_laptop = datasets.load_dataset(
    '../dataset_scripts/semeval2014_task4/semeval2014_task4.py',
    data_files={
        'test': r'dataset_files\semeval_2014\Laptops_Test_Gold.xml',
        'train': r'dataset_files\semeval_2014\Laptop_Train_v2.xml'
    },
    cache_dir='../dataset_cache')

semeval_laptop = semeval_laptop["test"]
print(semeval_laptop[0])

Using custom data configuration default-9b6d847dca00ea02
Reusing dataset sem_eval2014_task4_dataset (../dataset_cache\sem_eval2014_task4_dataset\default-9b6d847dca00ea02\0.0.1\f33ba7108331ad17be3f9fb710ca001edb383fba797c6ed0938354e6812ca969)


{'aspect': 'Boot time', 'sentiment': 0, 'text': 'Boot time is super fast, around anywhere from 35 seconds to 1 minute.'}


## Manually created Prompts

In [4]:
sentiment_prompts = [
    "The {aspect} is [MASK].",
    "I [MASK] the {aspect}."]

## Adding prompts to review text

In [5]:
def add_prompts(reviews, prompts):

    texts = []
    sentiments = []
    ids = []
    aspect_prompts = []
    
    for i in range(len(reviews["aspect"])):
        
        aspect = reviews["aspect"][i]
        text = reviews["text"][i]
        sentiment = reviews["sentiment"][i]
        
        review_id = str(uuid.uuid1())
        
        for p in prompts:
            aspect_prompt = p.format(aspect=aspect)

            texts.append(text)
            sentiments.append(sentiment)
            ids.append(review_id)
            aspect_prompts.append(aspect_prompt)

    return {"text":texts, "prompt": aspect_prompts, "label": sentiments, "review_aspect_id": ids}

In [6]:
prompt_dataset = semeval_laptop.map(
    lambda e: add_prompts(e, sentiment_prompts),
    remove_columns=semeval_laptop.column_names,
    batched=True)

print(prompt_dataset[0])
print(len(prompt_dataset))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'label': 0, 'prompt': 'The Boot time is [MASK].', 'review_aspect_id': '6d5fd487-8224-11eb-b3ea-7085c2c04498', 'text': 'Boot time is super fast, around anywhere from 35 seconds to 1 minute.'}
1276


## Tokenize and Run model

In [7]:
def run_model(reviews, tokenizer, model, device):
    
    batch_tokens = tokenizer(reviews["text"], reviews["prompt"], 
                             truncation='only_first', padding='max_length', max_length=256, return_tensors="pt")
    batch_tokens.to(device=device)
    
    masked_indexes = []

    for tokens_input_ids in batch_tokens.data["input_ids"]:

        masked_index = torch.nonzero(tokens_input_ids == tokenizer.mask_token_id, as_tuple=False).item()
        masked_indexes.append(masked_index)
        
    outputs = model(**batch_tokens)
    
    output_list = []
    for i in range(len(outputs["logits"])):
        masked_index = masked_indexes[i]
        output_list.append(outputs["logits"][i][masked_index])
    
    return {"logit_tensor":output_list, "label": reviews["label"], "review_aspect_id": reviews["review_aspect_id"]}

In [8]:
model_output = prompt_dataset.map(
    lambda e: run_model(e, tokenizer, model, cuda),
    remove_columns=prompt_dataset.column_names,
    batched=True, batch_size=2, num_proc=None)

#print(len(model_output["logit_tensor"][0]))

HBox(children=(FloatProgress(value=0.0, max=638.0), HTML(value='')))




## Run Logistic Regression on mask logits

In [9]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

LR_model = LogisticRegression(30522, 3)
LR_model.load_state_dict(torch.load("logit_models/MLM_BERT-Base_Trained_on_Restaurant.pt"))
LR_model.eval()
LR_model.to(device=cuda)

def run_LR(example, LR_model, device):
    logit_tensors = torch.FloatTensor(example["logit_tensor"]).to(device=device)
    class_probs = LR_model(logit_tensors)
    class_probs_list = []
    
    for p in class_probs:
        class_probs_list.append(p)
    return {"class_probs": class_probs_list, "label": example["label"], "review_aspect_id": example["review_aspect_id"]}

LR_output = model_output.map(
    lambda e: run_LR(e, LR_model, cuda),
    remove_columns=model_output.column_names,
    batched=True, batch_size=32, num_proc=None)

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))




## Convert prompt class probs to predictions

In [10]:
def make_predictions(example, device):
    ids = example["review_aspect_id"]
    labels = example["label"]
    for a in ids:
        for b in ids:
            assert a == b
            
    for a in labels:
        for b in labels:
            assert a == b
            
    class_probs = torch.FloatTensor(example["class_probs"])
    prediction_mean = torch.mean(class_probs, 0)
    prediction = torch.argmax(prediction_mean)
    
    return {"prediction": [prediction], "label": [example["label"][0]], "review_aspect_id": [example["review_aspect_id"][0]]}

predictions = LR_output.map(
    lambda e: make_predictions(e, cuda),
    remove_columns=LR_output.column_names,
    batched=True, batch_size=len(sentiment_prompts), num_proc=None)

HBox(children=(FloatProgress(value=0.0, max=638.0), HTML(value='')))




In [11]:
total = 0
correct = 0

for pred in predictions:
    total += 1
    if pred["prediction"] == pred["label"]:
        correct += 1
        
print(correct/total)

0.6802507836990596
