In [1]:
import pickle as pkl

import numpy as np
import torch

from transformers import AutoModelForMultipleChoice, AutoTokenizer
from datasets import load_dataset

from src.utils_multiple_choice import convert_examples_to_features, InputExample

from src.bertviz.bertviz import model_view_question

In [2]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [3]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min',
    jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
  }
});

<IPython.core.display.Javascript object>

In [4]:
model = AutoModelForMultipleChoice.from_pretrained("../assets/models/bb_race_m/")
tokenizer = AutoTokenizer.from_pretrained("../assets/models/bb_race_m")

dataset = load_dataset("race", "middle")
test = dataset['test']

label_list = ["0", "1", "2", "3"]
label_map = {
    0: "A",
    1: "B",
    2: "C",
    3: "D"
}
max_seq_length = 128

Reusing dataset race (/home/marcos/.cache/huggingface/datasets/race/middle/0.1.0/a7d1fac780e70c0e75bca35e9f2f8cfc1411edd18ffd6858ddce56f70dfb1e7c)


In [5]:
def predict(article, question, options, real_label=None, return_result=False):
    examples = [InputExample(
        example_id="pred",
        question=question,
        contexts=[article, article, article, article],  # this is not efficient but convenient
        endings=[options[0], options[1], options[2], options[3]],
        label=str(ord(real_label) - ord("A")) if real_label else "0"
    )]
    
    feature = convert_examples_to_features(
        examples,
        label_list,
        max_seq_length,
        tokenizer
    )[0]
    
    features = {
        'input_ids': torch.tensor([feature.input_ids]),
        'attention_mask': torch.tensor([feature.attention_mask]),
        'token_type_ids': torch.tensor([feature.token_type_ids]),
    } 
    
    if return_result:
        result = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)
        return result
    else:
        result = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'])[0][0]
        return np.array([float(abs(x)) for x in result]).argmax()

In [6]:
ex = test[0]

article = ex['article']
question = ex['question']
options = ex['options']
real_label = ex['answer']

result = predict(article, question, options, real_label)
print(f"Question: {question}")
print(f"Options: {options}")
print(f"Result: {label_map[result]}")

convert examples to features: 1it [00:00, 57.08it/s]


Question: A discipline leader is supposed to  _  .
Options: ['take care of the whole group', 'make sure that everybody finishes homework', 'make sure that nobody chats in class', 'collect all the homework and hand it in to teachers']
Result: C


In [7]:
result = predict(article, question, options, real_label, return_result=True)

convert examples to features: 1it [00:00, 85.04it/s]


In [8]:
def show_model_view(ex):
    examples = [InputExample(
        example_id="pred",
        question=ex['question'],
        contexts=[ex['article'], ex['article'], ex['article'], ex['article']],  # this is not efficient but convenient
        endings=[ex['options'][0], ex['options'][1], ex['options'][2], ex['options'][3]],
        label=str(ord(ex['answer']) - ord("A")) if ex['answer'] else "0"
    )]
    
    feature = convert_examples_to_features(
        examples,
        label_list,
        max_seq_length,
        tokenizer
    )[0]
    
    features = {
        'input_ids': torch.tensor([feature.input_ids]),
        'attention_mask': torch.tensor([feature.attention_mask]),
        'token_type_ids': torch.tensor([feature.token_type_ids]),
    } 
    
    option_a = 0
    option_b = 1
    option_c = 2
    option_d = 3
    
    input_id_list = feature.input_ids[option_a]
    tokens = tokenizer.convert_ids_to_tokens(input_id_list) 
    token_type_ids = feature.token_type_ids
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']
    q_start_a = token_type_ids[option_a].index(1)
    option_start_a = len(tokenizer.tokenize(question)[:-2])
    tokens_a = tokens[q_start_a:]
    atts_a = []
    for att in attention:
        atts_a.append(att[option_a].reshape(1, 12, 128, 128)[:, :, q_start_a:, q_start_a:])
    
    input_id_list = feature.input_ids[option_b]
    tokens = tokenizer.convert_ids_to_tokens(input_id_list) 
    token_type_ids = feature.token_type_ids
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']
    q_start_b = token_type_ids[option_b].index(1)
    option_start_b = len(tokenizer.tokenize(question)[:-2])
    tokens_b = tokens[q_start_b:]
    atts_b = []
    for att in attention:
        atts_b.append(att[option_b].reshape(1, 12, 128, 128)[:, :, q_start_b:, q_start_b:])
    
    input_id_list = feature.input_ids[option_c]
    tokens = tokenizer.convert_ids_to_tokens(input_id_list) 
    token_type_ids = feature.token_type_ids
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']
    q_start_c = token_type_ids[option_c].index(1)
    option_start_c = len(tokenizer.tokenize(question)[:-2])
    tokens_c = tokens[q_start_c:]
    atts_c = []
    for att in attention:
        atts_c.append(att[option_c].reshape(1, 12, 128, 128)[:, :, q_start_c:, q_start_c:])
        
    input_id_list = feature.input_ids[option_d]
    tokens = tokenizer.convert_ids_to_tokens(input_id_list) 
    token_type_ids = feature.token_type_ids
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']
    q_start_d = token_type_ids[option_d].index(1)
    option_start_d = len(tokenizer.tokenize(question)[:-2])
    tokens_d = tokens[q_start_d:]
    atts_d = []
    for att in attention:
        atts_d.append(att[option_d].reshape(1, 12, 128, 128)[:, :, q_start_d:, q_start_d:])

    atts = {
        'a': atts_a,
        'b': atts_b,
        'c': atts_c,
        'd': atts_d
    }
    tokens_ = {
        'a': tokens_a,
        'b': tokens_b,
        'c': tokens_c,
        'd': tokens_d
    }
    options = {
        'a': option_start_a,
        'b': option_start_b,
        'c': option_start_c,
        'd': option_start_d
    }
    model_vi. ew_question(atts, tokens_, options)
    
    return atts, tokens_, options

In [9]:
atts, tokens, option_start = show_model_view(ex)

convert examples to features: 1it [00:00, 59.70it/s]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>