# Setup

In [16]:
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM
import numpy as np
import torch
from pprint import pprint
import logging
import re

transformers.logging.set_verbosity_error()

def print_header(header):
    print("\n\n----------------------------------------------------------")
    print(header)
    print("----------------------------------------------------------")

models = {
    # 'BERT': 'bert-base-uncased',
    'BERT (whole word)': 'bert-large-uncased-whole-word-masking',
    'GPT': 'gpt2',
}

horse_sentence_dict = {
    'NP/S': {
        'ambiguous': "The horse raced past the barn [MASK]",
        'un-ambiguated': "The horse raced past the barn, [MASK]"
    }
}

horse_fillers = [
        'fell',
        'is',
        'was',
        'and',
        '.',
    ]

butter_sentence_dict = {
    'NP/S': {
        'ambiguous': "The butter melted in the pan [MASK]",
        'un-ambiguated': "The butter melted in the pan, [MASK]",
    }
}

butter_fillers = [
        'smelled',
        'smells',
        'is',
        'was',
        'and',
        '.',
    ]

politician_sentence_dict = {
    'NP/S': {
        'ambiguous': 'The corrupt politician mentioned the bill [MASK]',
        'un-ambiguated': 'The corrupt politician that mentioned the bill [MASK]',
    },
    'NP/Z': {
        'ambiguous': 'After the corrupt politician signed the bill [MASK]',
        'un-ambiguated': 'After the corrupt politician signed, the bill [MASK]',
    },
    'MVRR': {
        'ambiguous': 'The corrupt politician handed the bill [MASK]',
        'un-ambiguated': 'The corrupt politician who was handed the bill [MASK]',
    },
}

politician_fillers = {
    'incorrect': [
            'and',
            '.',
            'to',
    ],
    'correct': [
        'is',
        'was',
        'received'
    ],
}

politician_filler = 'received'

# Behavior Exploration -- BERT

In [3]:
def setup(model):
    tokenizer = AutoTokenizer.from_pretrained(models[model])
    model = AutoModelForMaskedLM.from_pretrained(models[model])
    bert = pipeline("fill-mask", model=model, tokenizer=tokenizer)
    mask = bert.tokenizer.mask_token
    return bert, mask

def runBERT(bert, sentence_dict, fillers):
    filler_results = {}
    top_preds = {}
    for sentence_type in sentence_dict:
        top_preds[sentence_type] = {}
        filler_results[sentence_type] = {}
        for clarity in ['ambiguous', 'un-ambiguated']:
            sentence = sentence_dict[sentence_type][clarity]
            outputs = bert(sentence, top_k=12)
            top_preds[sentence_type][clarity] = [(output["token_str"], output['score']) for output in outputs]
            filler_results[sentence_type][clarity] = {}
            for accuracy in fillers:
                for filler in fillers[accuracy]:
                    filler_results[sentence_type][clarity][filler] = bert(sentence, targets=[filler])[0]["score"]
    return filler_results, top_preds
    

In [4]:
bert, mask = setup('BERT (whole word)')
filler_results, top_preds = runBERT(bert, politician_sentence_dict, politician_fillers)

print_header('Filler Results')
pprint(filler_results, sort_dicts=False)

# print_header('Model Predictions')
# pprint(top_preds, sort_dicts=False)




----------------------------------------------------------
Filler Results
----------------------------------------------------------
{'NP/S': {'ambiguous': {'and': 0.001315354835242033,
                        '.': 0.7417080998420715,
                        'to': 0.002144616562873125,
                        'received': 2.3071579562383704e-06},
          'un-ambiguated': {'and': 0.00040642396197654307,
                            '.': 0.22318482398986816,
                            'to': 0.0011608798522502184,
                            'received': 2.1722256860812195e-05}},
 'NP/Z': {'ambiguous': {'and': 0.0007047720719128847,
                        '.': 0.7722906470298767,
                        'to': 0.0003126610827166587,
                        'received': 8.955280463851523e-06},
          'un-ambiguated': {'and': 0.0014642988098785281,
                            '.': 0.17061227560043335,
                            'to': 0.006601774133741856,
                            'r

In [20]:
def get_ratios(filler_results, filler):
    ratios = {}
    for sentence_type in filler_results:
        unambig = filler_results[sentence_type]['un-ambiguated'][filler]
        ambig = filler_results[sentence_type]['ambiguous'][filler]
        ratios[sentence_type] = unambig/ambig
    return ratios


In [22]:
ratios = get_ratios(filler_results, politician_filler)
pprint(ratios, sort_dicts=False)

{'NP/S': 9.415158074494618,
 'NP/Z': 51.23381853432232,
 'MVRR': 15.754554639597483}


# Behavior Exploration -- GPT2

In [7]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import numpy as np 

def score(model, tokens_tensor):
    loss=model(tokens_tensor, labels=tokens_tensor)[0]
    return np.exp(loss.cpu().detach().numpy())

def runGPT(sentence_dict):
    model = GPT2LMHeadModel.from_pretrained(models['GPT'])
    tokenizer = GPT2Tokenizer.from_pretrained(models['GPT'])
    filler_results = {}
    top_preds = {}
    model.eval()
    for sentence_type in sentence_dict:
        for sentence in sentence_type['sentences']:
            # text = sentence.replace('[MASK]', '')
            # with torch.no_grad():
                # outputs = model(tokenizer.encode(text, add_special_tokens=False, return_tensors="pt"))
                # predictions = outputs[0][0, -1, :]
                # print(len(predictions))
                # print([tokenizer.decode([pred.item()]) for pred in predictions])

            # next_token_logits = outputs[0]
            # print(next_token_logits)
            
            filler_results[sentence] = {}
            for filler in sentence_type['fillers']:
                tokens_tensor = tokenizer.encode(sentence.replace('[MASK]', filler), add_special_tokens=False, return_tensors="pt")
                filler_results[sentence][filler] = score(model, tokens_tensor)
    return filler_results, top_preds

In [8]:
# filler_results, top_preds = runGPT(sentence_dict)
# print_header("Filler Results")
# pprint(filler_results, sort_dicts=False)
# print_header("Model Predictions")
# pprint(top_preds, sort_dicts=False)