### Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datasets
from functools import partial
import pandas as pd
from transformers import (
    GPT2Tokenizer,
    GPTNeoForSequenceClassification,
    GPTNeoForCausalLM,
    AutoTokenizer,
    OPTForCausalLM,
)
import torch
import numpy as np

In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join("../src"))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from models.sft_training import (
    train_judge_for_multirc,
    train_judge_for_multirc_with_lm_head,
)
from data.create_qa_dataloaders import create_multirc_lm_dataloaders

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
from utils import set_seed

set_seed(62)

In [None]:
from constants import FALSE_LABEL_STR, TRUE_LABEL_STR

id2label = {0: FALSE_LABEL_STR, 1: TRUE_LABEL_STR}
label2id = {FALSE_LABEL_STR: 0, TRUE_LABEL_STR: 1}

# Train Judge

In [None]:
int8_training = True  # https://pytorch.org/blog/accelerating-training-on-nvidia-gpus-with-pytorch-automatic-mixed-precision/
lora_training = True  # https://github.com/microsoft/LoRA
autocast_training = True  # Trains with quantized weights. Only use if your hardware doesn't support int8_training

### Classification Head

In [None]:
# model_name = "xhyi/PT_GPTNEO350_ATG"
model_name = "EleutherAI/gpt-neo-1.3B"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPTNeoForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    id2label=id2label,
    label2id=label2id,
    load_in_8bit=int8_training,
    low_cpu_mem_usage=int8_training,
)

In [None]:
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))

In [None]:
run_name = "gpt-neo-1.3B"
project_name = "MultiRC-Judge"
store_locally = False  # Set False if you want to delete any config + checkpoint files in models/ (doesn't delete from subdirectories)
upload_to_wandb = True

batch_size = 16
lr = 5e-5
lr_scheduler = None  # "cosine-annealing" | None

epochs = 10
acc_every_batch = 250
eval_every_batch = 250
save_every_epoch = 1

In [12]:
train_judge_for_multirc(
    model=model,
    tokenizer=tokenizer,
    model_name=model_name,
    run_name=run_name,
    project_name=project_name,
    device=device,
    lr=lr,
    lr_scheduler=lr_scheduler,
    autocast_training=autocast_training,
    int8_training=int8_training,
    lora_training=lora_training,
    batch_size=batch_size,
    store_locally=store_locally,
    upload_to_wandb=upload_to_wandb,
    epochs=epochs,
    acc_every_batch=acc_every_batch,
    eval_every_batch=eval_every_batch,
    save_every_epoch=save_every_epoch,
    balance=True,
)

In [None]:
import wandb

wandb.finish()

### LM Head

In [None]:
model_name = "EleutherAI/gpt-neo-1.3B"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPTNeoForCausalLM.from_pretrained(
    model_name, load_in_8bit=int8_training, low_cpu_mem_usage=int8_training
)

In [None]:
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))

In [None]:
run_name = "gpt-neo-1.3B-LM-Head"
project_name = "MultiRC-Judge"
store_locally = False  # Set False if you want to delete any config + checkpoint files in models/ (doesn't delete from subdirectories)

batch_size = 16
lr = 5e-5
lr_scheduler = None  # "cosine-annealing" | None

epochs = 10
acc_every_batch = 250
eval_every_batch = 250
save_every_epoch = 1

In [None]:
train_loader, val_loader = create_multirc_lm_dataloaders(tokenizer)

In [None]:
train_judge_for_multirc_with_lm_head(
    model=model,
    tokenizer=tokenizer,
    train_loader=train_loader,
    test_loader=val_loader,
    model_name=model_name,
    run_name=run_name,
    project_name=project_name,
    device=device,
    lr=lr,
    lr_scheduler=lr_scheduler,
    autocast_training=autocast_training,
    int8_training=int8_training,
    lora_training=lora_training,
    batch_size=batch_size,
    store_locally=store_locally,
    epochs=epochs,
    acc_every_batch=acc_every_batch,
    eval_every_batch=eval_every_batch,
    save_every_epoch=save_every_epoch,
    eos_token_id=tokenizer.eos_token_id,
)

In [None]:
import wandb

wandb.finish()

In [None]:
from models.evaluation import generate_completion

In [None]:
val_data = pd.read_csv("../data/processed/easy_mrc_val.csv")

In [None]:
from peft import PeftModel

judge = PeftModel.from_pretrained(model, model_id="../models/multirc_lm_judge")
judge = judge.merge_and_unload()

In [None]:
prompt_prefix = "Reply True if the answer is a correct answer to the question and False otherwise.\n\n"
prompt = "Context:\nAllan sat down at his desk and pulled the chair in close. Opening a side drawer, he took out a piece of paper and his inkpot. After filling his pen, Allan looked at his paper in the orange glow from the lantern set back in the desk's right - hand corner.\n\nQuestion:\nName few objects said to be in or on Allan's desk\n\nAnswer:\nEraser"
prompt_suffix = "\n\nTrue or False:"
prompt = prompt_prefix + prompt + prompt_suffix

In [None]:
print(prompt)

In [None]:
judge.to(device)

In [None]:
evaluation = generate_completion(judge, tokenizer, prompt)

In [None]:
model.eval()
score = 0

for idx, row in val_data.iterrows():
    prompt = prompt_prefix + row["prompt"] + prompt_suffix
    completion = generate_completion(model, tokenizer, prompt, max_new_tokens=50)
    # print(completion)
    # print()
    evaluation = " " + completion.split(" ")[-1].split("<|endoftext|>")[0]
    if evaluation == " True" and row["label"] == 1:
        score += 1
    elif evaluation == " False" and row["label"] == 0:
        score += 1
    if idx == 1999:
        break

In [None]:
score / 2000

In [None]:
prompt_prefix = "Reply True if the answer is a correct answer to the question and False otherwise.\n\n"
prompt_suffix = "\n\nTrue or False:"

prompt = (
    "Context:\n\n"
    + "Allan sat down at his desk and pulled the chair in close. Opening a side drawer, he took out a piece of paper and his inkpot. After filling his pen, Allan looked at his paper in the orange glow from the lantern set back in the desk's right - hand corner."
    + "\n\nQuestion:"
    + "\nName few objects said to be in or on Allan's desk"
    + "\n\nAnswer:"
    + "\Lantern"
)
prompt = prompt_prefix + prompt + prompt_suffix

In [None]:
generate_completion(model, tokenizer, prompt)[-1]