In [1]:
import numpy as np
import torch
from tqdm import tqdm

In [2]:
%%capture
from transformers import AutoTokenizer, AutoModelForMultipleChoice

my_token = "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"

model = AutoModelForMultipleChoice.from_pretrained("danlou/albert-xxlarge-v2-finetuned-csqa", use_auth_token = my_token)

In [3]:
%%capture
tokenizer = AutoTokenizer.from_pretrained("danlou/albert-xxlarge-v2-finetuned-csqa", use_auth_token = my_token)



In [21]:
# prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
# choice0 = "It is eaten with a fork and a knife."
# choice1 = "It is eaten while held in the hand."
# labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1

# encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True)
# outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()}, labels=labels)  # batch size is 1
# torch.softmax(outputs.logits, dim=1)

## Setup Logging

In [4]:
import logging
logging.basicConfig(filename='std_albert_script.log', filemode='w', format='%(asctime)s : %(name)s : %(levelname)s : %(message)s')
logger=logging.getLogger() 
logger.setLevel(logging.INFO)

## Import Riddle Sense

In [5]:
import jsonlines
riddle_sense = []
with jsonlines.open('riddle_sense/rs_train.jsonl') as f:
    for line in f:
        riddle_sense.append(line)

In [6]:
options = {0: "A", 1: "B", 2: "C", 3: "D", 4: "E"}

In [7]:
numbers_form_opt =  { "A": 0, "B": 1, "C":2, "D":3, "E": 4}

In [8]:
# riddle_sense[0][ 'answerKey']

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info("Current Device: " + device)

In [12]:
_ = model.to(device)

In [56]:
# model_inference = model.copy()

In [12]:
def inference(riddle):
    
    choices = riddle['choices']    
    
    # create k(number of options)  duplicates of the question 
    option_count = len(choices)
    questions = ((riddle['question'] + "~~") * option_count).split("~~")[:-1]


    encoding = tokenizer(questions, choices, return_tensors="pt", padding=True).to(device)
    outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()})
    probability = torch.softmax(outputs.logits, dim=1) 
    prediction = torch.argmax(probability, dim=1).item()
    probability = probability.tolist()[0]

    proba_dist = " "
    
    for i in range(len(probability)):
        proba_dist = proba_dist + choices[i]+ ":"
        proba_dist = proba_dist + str(round(probability[i], 4))
        proba_dist = proba_dist + " -> "
        
    
    return "Prediction : "+ str(prediction) + "  || " +"Probability Distribution --> " + proba_dist

In [9]:
# riddle = {
#     'question': 'A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter?'
# , 'choices': ['throw', 'bit', 'gallon', 'mouse', 'hole']}





In [10]:
# inference(riddle)

In [16]:
# proba_dist

### Finetuned on CSQA

In [18]:
predictions = []
answer_key = []

corrected_count = 0


for riddle_num in tqdm(range(len(riddle_sense))):
    questions = []
    choices = []

    for i in range(0,5):
        questions.append(riddle_sense[riddle_num]['question']['stem'])
        choices.append(riddle_sense[riddle_num]['question']['choices'][i]["text"])

    encoding = tokenizer(questions, choices, return_tensors="pt", padding=True).to(device)
    outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()})
    probability = torch.softmax(outputs.logits, dim=1)
    prediction = options[torch.argmax(probability, dim=1).item()]
    current_answer = riddle_sense[riddle_num][ 'answerKey']
    
    if not prediction == current_answer:
        pred_index = numbers_form_opt[prediction]
        answer_index = numbers_form_opt[current_answer]

        
        masked_choices = choices.copy()
        masked_choices[pred_index] = "XXXXXXX"
        
        ablated_answer = choices[answer_index]
        

        
        encoding = tokenizer(questions, masked_choices, return_tensors="pt", padding=True).to(device)
        outputs_ablated = model(**{k: v.unsqueeze(0) for k, v in encoding.items()})
        probability_ablated = torch.softmax(outputs_ablated.logits, dim=1)
        prediction_ablated = masked_choices[torch.argmax(probability_ablated, dim=1).item()]
        
#         logger.info("Trail.  Question: " + riddle_sense[riddle_num]['question']['stem']+ " | Choices: "+ ", ".join(choices)+ "| Initial Prediction  :" + choices[numbers_form_opt[prediction]] + " | Ablated Prediction: " + prediction_ablated + " | Answer: " +ablated_answer)
        
        if prediction_ablated == ablated_answer:
            corrected_count = corrected_count + 1

    predictions.append(prediction)
    answer_key.append(current_answer)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3510/3510 [02:52<00:00, 20.37it/s]


In [20]:
riddle_sense[riddle_num]['question']['choices'][]

[{'label': 'A', 'text': 'crowd'},
 {'label': 'B', 'text': 'characteristic'},
 {'label': 'C', 'text': 'sportsmanlike'},
 {'label': 'D', 'text': 'all the fans left'},
 {'label': 'E', 'text': 'shade'}]

In [19]:
corrected_count

441

In [21]:
len(predictions[:4000])

3510

In [22]:
num = len(predictions)

## Accuracy

In [31]:
## Initial Accuracy
initial_accuracy = ((sum(np.array(predictions[:num])==np.array(answer_key[:num])))/len(predictions[:num]))
initial_accuracy

0.5262108262108263

In [32]:
mask_accuracy = ((sum(np.array(predictions[:num])==np.array(answer_key[:num]))+corrected_count)/len(predictions[:num]))
mask_accuracy

0.6518518518518519

In [30]:
mask_accuracy-initial_accuracy

0.12564102564102564

In [None]:
{"id": "xzc81234ioioucv-1052",
 "question": {"stem": "What comes in many different sizes but is always only 1 foot long?",
              "choices": [{"label": "A", "text": "object"}, {"label": "B", "text": "shoe"}, {"label": "C", "text": "footing"},
                          {"label": "D", "text": "grave"}, {"label": "E", "text": "column"}]}, "answerKey": "B"}

### Finetuned on riddlesense

In [15]:
import evaluate

metric = evaluate.load("accuracy")

In [16]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [17]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

In [35]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [20]:
%%capture

from datasets import load_dataset

csqa_dataset = load_dataset("commonsense_qa")

In [21]:
csqa_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 9741
    })
    validation: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1221
    })
    test: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1140
    })
})

In [14]:
%%capture
from datasets import load_dataset
riddle_sense_dataset = load_dataset('json',  data_files={'train': ['riddle_sense/rs_train.jsonl'], 'validation': ['riddle_sense/rs_dev.jsonl']} )



In [34]:
riddle_sense_dataset['train'][0]['question']['choices'][i]

{'label': 'A', 'text': 'throw'}

In [41]:
riddle_sense[0]

{'id': 'cxzvpoiuzckf-3123',
 'question': {'stem': 'A man is incarcerated in prison, and as his punishment he has to carry a one tonne bag of sand backwards and forwards across a field the size of a football pitch.  What is the one thing he can put in it to make it lighter?',
  'choices': [{'label': 'A', 'text': 'throw'},
   {'label': 'B', 'text': 'bit'},
   {'label': 'C', 'text': 'gallon'},
   {'label': 'D', 'text': 'mouse'},
   {'label': 'E', 'text': 'hole'}]},
 'answerKey': 'E'}

In [74]:
def preprocess(examples):

    questions = []
    choices = []

    for i in range(0,5):
#         questions.append(riddle_sense_dataset['train'][0]['question']['stem'])
#         choices.append(riddle_sense_dataset['train'][0]['question']['choices'][i]["text"])
        questions.append(examples['question']['stem'])
        choices.append(examples['question']['choices'][i]["text"])


    encoding = tokenizer(questions, choices, return_tensors="pt",  truncation=True, padding=True).to(device)
    return {k: v.unsqueeze(0) for k, v in encoding.items()}

In [75]:
tokenized_datasets = riddle_sense_dataset.map(preprocess)

  0%|          | 0/3510 [00:00<?, ?ex/s]

  0%|          | 0/1021 [00:00<?, ?ex/s]

In [76]:
tokenized_datasets['train']

Dataset({
    features: ['id', 'question', 'answerKey', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 3510
})

In [71]:
# def tokenize_function(examples):
#     return tokenizer(examples["question"], padding="max_length", truncation=True)


# tokenized_datasets = riddle_sense_dataset.map(tokenizer, batched=True)

In [73]:
# train_encodings = tokenizer(riddle_sense_dataset['train'], truncation=True, padding=True)
# val_encodings = tokenizer(riddle_sense_dataset['validation'], truncation=True, padding=True)
# test_encodings = tokenizer(test_texts, truncation=True, padding=True)

In [86]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch


@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        print(features[0].keys())
        label_name = "answerKey" if "answerKey" in features[0].keys() else "answerKey"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [87]:
tokenized_datasets['train'][0].keys()

dict_keys(['id', 'question', 'answerKey', 'input_ids', 'token_type_ids', 'attention_mask'])

In [88]:
trainer = Trainer(
    model=model,
    tokenizer = tokenizer,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
    data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),

)

In [89]:
trainer.train()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])


KeyError: 'answerKey'

In [91]:
%%capture
swag = load_dataset("swag", "regular")

In [92]:
ending_names = ["ending0", "ending1", "ending2", "ending3"]


def preprocess_function(examples):
    first_sentences = [[context] * 4 for context in examples["sent1"]]
    question_headers = examples["sent2"]
    second_sentences = [
        [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
    ]

    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}

In [94]:
%%capture
tokenized_swag = swag.map(preprocess_function, batched=True)


In [102]:
training_args = TrainingArguments(
    output_dir="my_awesome_swag_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_swag["train"],
    eval_dataset=tokenized_swag["validation"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()

OSError: Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.