# Setup

In [32]:
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM
import numpy as np
import torch
from pprint import pprint
import logging
import re

transformers.logging.set_verbosity_error()

def print_header(header):
    print("\n\n----------------------------------------------------------")
    print(header)
    print("----------------------------------------------------------")

models = {
    # 'BERT': 'bert-base-uncased',
    'BERT (whole word)': 'bert-large-uncased-whole-word-masking',
    'GPT': 'gpt2',
}

sentence_dict = [
    {
        'sentences': [
            "The horse raced past the barn [MASK]",
            # "The horse raced past the barn [MASK]."
            ],
        'fillers': [
            'fell',
            'is',
            'was',
            'and',
            '.',
        ]
    },
    {
        'sentences': [
            "The butter melted in the pan [MASK]",
            # "The butter melted in the pan [MASK].",
        ],
        'fillers': [
            'smelled',
            'smells',
            'is',
            'was',
            'and',
            '.',
        ]
    }
]


# Behavior Exploration -- BERT

In [33]:
def setup(model):
    tokenizer = AutoTokenizer.from_pretrained(models[model])
    model = AutoModelForMaskedLM.from_pretrained(models[model])
    bert = pipeline("fill-mask", model=model, tokenizer=tokenizer)
    mask = bert.tokenizer.mask_token
    return bert, mask

def runBERT(sentence_dict):
    bert, mask = setup('BERT (whole word)')
    filler_results = {}
    top_preds = {}
    for sentence_type in sentence_dict:
        for sentence in sentence_type['sentences']:
            # sentence = sentence.replace("[MASK]", mask)
            filler_results[sentence] = {}
            outputs = bert(sentence, top_k=12)
            top_preds[sentence] = [(output["token_str"], output['score']) for output in outputs]
            for filler in sentence_type['fillers']:
                filler_results[sentence][filler] = bert(sentence, targets=[filler])[0]["score"]
    return filler_results, top_preds
    

In [34]:
filler_results, top_preds = runBERT(sentence_dict)

print_header('Filler Results')
pprint(filler_results, sort_dicts=False)

print_header('Model Predictions')
pprint(top_preds, sort_dicts=False)




----------------------------------------------------------
Filler Results
----------------------------------------------------------
{'The horse raced past the barn [MASK]': {'fell': 1.0337573463914396e-08,
                                          'is': 5.357605559197509e-08,
                                          'was': 3.807201380823244e-07,
                                          'and': 0.00020737976592499763,
                                          '.': 0.9778836965560913},
 'The butter melted in the pan [MASK]': {'smelled': 7.43062429364727e-08,
                                         'smells': 2.818679689653436e-08,
                                         'is': 1.2275096139546804e-07,
                                         'was': 7.456794151039503e-07,
                                         'and': 0.00023132975911721587,
                                         '.': 0.9349180459976196}}


----------------------------------------------------------
Model Predictions

# Behavior Exploration -- GPT2

In [88]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import numpy as np 

def score(model, tokens_tensor):
    loss=model(tokens_tensor, labels=tokens_tensor)[0]
    return np.exp(loss.cpu().detach().numpy())

def runGPT(sentence_dict):
    model = GPT2LMHeadModel.from_pretrained(models['GPT'])
    tokenizer = GPT2Tokenizer.from_pretrained(models['GPT'])
    filler_results = {}
    top_preds = {}
    model.eval()
    for sentence_type in sentence_dict:
        for sentence in sentence_type['sentences']:
            # text = sentence.replace('[MASK]', '')
            # with torch.no_grad():
                # outputs = model(tokenizer.encode(text, add_special_tokens=False, return_tensors="pt"))
                # predictions = outputs[0][0, -1, :]
                # print(len(predictions))
                # print([tokenizer.decode([pred.item()]) for pred in predictions])

            # next_token_logits = outputs[0]
            # print(next_token_logits)
            
            filler_results[sentence] = {}
            for filler in sentence_type['fillers']:
                tokens_tensor = tokenizer.encode(sentence.replace('[MASK]', filler), add_special_tokens=False, return_tensors="pt")
                filler_results[sentence][filler] = score(model, tokens_tensor)
    return filler_results, top_preds

In [89]:
filler_results, top_preds = runGPT(sentence_dict)
print_header("Filler Results")
pprint(filler_results, sort_dicts=False)
print_header("Model Predictions")
pprint(top_preds, sort_dicts=False)



----------------------------------------------------------
Filler Results
----------------------------------------------------------
{'The horse raced past the barn [MASK]': {'fell': 721.4816,
                                          'is': 333.83664,
                                          'was': 323.6483,
                                          'and': 109.708084,
                                          '.': 486.77307},
 'The butter melted in the pan [MASK]': {'smelled': 362.05237,
                                         'smells': 339.34354,
                                         'is': 128.78635,
                                         'was': 131.93286,
                                         'and': 53.049614,
                                         '.': 159.93929}}


----------------------------------------------------------
Model Predictions
----------------------------------------------------------
{}
