# Set up (RUN EVERYTIME)


In [None]:
# Use if Drive mounting bugs out
from google.colab import drive
drive.flush_and_unmount()

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os

# NOTE 1: The '%' symbolizes special "magic" commands for Jupyter/IPython
# NOTE 2: This specific ("magic") command changes the current directory...
          # within the notebook kernel
%cd /content/drive/MyDrive/DATASCI266
# print(os.getcwd()) # No need to print

/content/drive/MyDrive/DATASCI266/SQuAD


# Inspect Data (DONE)

## Inspect RACE structure (DONE)

In [None]:
import json

race = []
with open ("/content/drive/MyDrive/DATASCI266/RACE_preproc/race/train.jsonl", "r") as f:
  for line in f:
    race.append(json.loads(line))

In [None]:
len(race)

87866

In [None]:
race[0] # Looking at first element of RACE dataset

{'context': 'Last week I talked with some of my students about what they wanted to do after they graduated, and what kind of job prospects  they thought they had.\nGiven that I teach students who are training to be doctors, I was surprised do find that most thought that they would not be able to get the jobs they wanted without "outside help". "What kind of help is that?" I asked, expecting them to tell me that they would need a   or family friend to help them out.\n"Surgery ," one replied.\nI was pretty alarmed by that response. It seems that the graduates of today are increasingly willing to go under the knife to get ahead of others when it comes to getting a job .\nOne girl told me that she was considering surgery to increase her height. "They break your legs, put in special extending screws, and slowly expand the gap between the two ends of the bone as it re-grows, you can get at least 5 cm taller!"\nAt that point, I was shocked. I am short, I can\'t deny that, but I don\'t think I

## Inspect SQuAD structure (DONE)

In [None]:
import json

with open("train-v2.0.json", "r") as f:
    squad = json.load(f)

In [None]:
squad.keys()

dict_keys(['version', 'data'])

In [None]:
# squad_version = squad["version"] # Useless
squad_data = squad["data"]
type(squad_data)

list

In [None]:
squad_data[0].keys()

dict_keys(['title', 'paragraphs'])

In [None]:
element_title = squad_data[0]["title"]
element_title

'Beyonc√©'

In [None]:
element_paragraphs = squad_data[0]["paragraphs"]
# element_paragraphs

In [None]:
len(element_paragraphs)

66

In [None]:
element_paragraphs[0].keys()

dict_keys(['qas', 'context'])

In [None]:
element_paragraphs[0]["qas"]

[{'question': 'When did Beyonce start becoming popular?',
  'id': '56be85543aeaaa14008c9063',
  'answers': [{'text': 'in the late 1990s', 'answer_start': 269}],
  'is_impossible': False},
 {'question': 'What areas did Beyonce compete in when she was growing up?',
  'id': '56be85543aeaaa14008c9065',
  'answers': [{'text': 'singing and dancing', 'answer_start': 207}],
  'is_impossible': False},
 {'question': "When did Beyonce leave Destiny's Child and become a solo singer?",
  'id': '56be85543aeaaa14008c9066',
  'answers': [{'text': '2003', 'answer_start': 526}],
  'is_impossible': False},
 {'question': 'In what city and state did Beyonce  grow up? ',
  'id': '56bf6b0f3aeaaa14008c9601',
  'answers': [{'text': 'Houston, Texas', 'answer_start': 166}],
  'is_impossible': False},
 {'question': 'In which decade did Beyonce become famous?',
  'id': '56bf6b0f3aeaaa14008c9602',
  'answers': [{'text': 'late 1990s', 'answer_start': 276}],
  'is_impossible': False},
 {'question': 'In what R&B group

In [None]:
element_paragraphs[0]["context"]

'Beyonc√© Giselle Knowles-Carter (/biÀêÀàj…ínse…™/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyonc√©\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".'

# Convert to RACE-style Multiple Choice (MC)

In [None]:
# GOAL: CONVERT TO FUNCTION
# STEP 1: READ IN SQuAD
import json

with open("/content/drive/MyDrive/DATASCI266/SQuAD/train-v2.0.json", "r") as f:
    squad_dataset = json.load(f)

# STEP 2: CREATE FUNCTION
import random
def convert_squad_to_race_style(squad_dataset):
    processed_examples = []

    for element in squad_dataset["data"]:
        for element_paragraph in element["paragraphs"]:
            context = element_paragraph["context"]
            qas = element_paragraph["qas"]

            # Hold all possible options, Has correct & distractor answers
            new_options = []
            for qa in qas:
                # Skip questions that can't be answered
                if qa["is_impossible"] == True:
                    continue
                for ans in qa["answers"]:
                    answer_text = ans["text"].strip()
                    if answer_text not in new_options:
                        new_options.append(answer_text)

            # Now convert each qa in this paragraph
            for qa in qas:
                # Skip questions that can't be answered
                if qa["is_impossible"] == True:
                    continue

                question = qa["question"]
                correct_answer = qa["answers"][0]["text"].strip()

                # Get wrong distractor answers by filtering out the correct answer
                wrong_answers = []
                for ans in new_options:
                    if ans != correct_answer:
                        wrong_answers.append(ans)

                        # Only want to get 3 wrong distractor answers
                        if len(wrong_answers) == 3:
                            break


                # Need to ignore current example if there are less than 3 wrong
                # answers because it will break
                if len(wrong_answers) < 3:
                    continue

                options = [correct_answer] + wrong_answers
                random.shuffle(options)
                label = options.index(correct_answer)

                race_like_example = {
                    "context": context,
                    "query": question,
                    "option_0": options[0],
                    "option_1": options[1],
                    "option_2": options[2],
                    "option_3": options[3],
                    "label": label
                }

                processed_examples.append(race_like_example)

    return processed_examples

# STEP 3: USE FUNCTION
mc_squad = convert_squad_to_race_style(squad_dataset)
print("Total MC examples:", len(mc_squad))
print(mc_squad[0])

Total MC examples: 75902
{'context': 'Beyonc√© Giselle Knowles-Carter (/biÀêÀàj…ínse…™/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyonc√©\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".', 'query': 'When did Beyonce start becoming popular?', 'option_0': 'Houston, Texas', 'option_1': 'in the late 1990s', 'option_2': 'singing and dancing', 'option_3': '2003', 'label': 1}


In [None]:
mc_squad[0] # Inspecting structure of single record

{'context': 'Beyonc√© Giselle Knowles-Carter (/biÀêÀàj…ínse…™/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyonc√©\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".',
 'query': 'When did Beyonce start becoming popular?',
 'option_0': 'Houston, Texas',
 'option_1': 'in the late 1990s',
 'option_2': 'singing and dancing',
 'option_3': '2003',
 'label': 1}

In [None]:
import random

# Seed for reproducibility + Shuffle
random.seed(26)
random.shuffle(mc_squad)

num_examples = len(mc_squad)
train_end = int(0.8 * num_examples)
val_end = int(0.9 * num_examples)

train_data = mc_squad[0 : train_end]
val_data   = mc_squad[train_end : val_end]
test_data  = mc_squad[val_end :]

print("Total:", num_examples)
print("Train:", len(train_data))
print("Val:", len(val_data))
print("Test:", len(test_data))


Total: 75902
Train: 60721
Val: 7590
Test: 7591


# Finetuning (DONE)

In [None]:
# Change to desired working directory
%cd /content/drive/MyDrive/DATASCI266/lrqa

/content/drive/MyDrive/DATASCI266/lrqa


In [None]:
# Can now finetune, Have train/test/validation files as needed
!export WANDB_DISABLED=true TF_CPP_MIN_LOG_LEVEL=2 PYTHONUNBUFFERED=1; \
python -u -m lrqa.run_lrqa \
  --model_name_or_path "/content/drive/MyDrive/DATASCI266/models/roberta_base_0" \
  --model_mode mc \
  --max_seq_length 512 \
  --task_name custom \
  --task_base_path "/content/drive/MyDrive/DATASCI266/SQuAD" \
  --output_dir "/content/drive/MyDrive/DATASCI266/SQuAD/models/roberta_base_2_finetuning/" \
  --learning_rate 1e-5 \
  --num_train_epochs 3 \
  --warmup_ratio 0.1 \
  --eval_steps 1000 \
  --save_steps 1000 \
  --save_total_limit 5 \
  --save_strategy steps \
  --eval_strategy steps \
  --load_best_model_at_end \
  --per_device_train_batch_size 8 \
  --per_device_eval_batch_size 8 \
  --gradient_accumulation_steps 4 \
  --do_train --do_eval --do_predict --predict_phases validation \
  --logging_strategy steps --logging_steps 50

2025-11-25 04:20:29.790145: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764044429.810286  245956 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764044429.816817  245956 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1764044429.832307  245956 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764044429.832332  245956 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764044429.832336  245956 computation_placer.cc:177] computation placer alr

# Evaluation with BBQ Dataset (DONE)

In [None]:
# Change to desired working directory
%cd /content/drive/MyDrive/DATASCI266/lrqa

/content/drive/MyDrive/DATASCI266


In [None]:
%%bash

cd /content/drive/MyDrive/DATASCI266/lrqa

MODEL_LOCATION=/content/drive/MyDrive/DATASCI266/SQuAD/models/roberta_base_2_finetuning/checkpoint-last
BBQ_LOCATION=/content/drive/MyDrive/DATASCI266/RACE_preproc/bbq
RUN_RESULTS_LOCATION=/content/drive/MyDrive/DATASCI266/SQuAD/bbq_runs
BATCH_SIZE=8

# To avoid warnings
export TF_CPP_MIN_LOG_LEVEL=2
export WANDB_DISABLED=true

for CATEGORY in Age Disability_status Gender_identity Nationality Physical_appearance Race_ethnicity Race_x_SES Race_x_gender Religion SES Sexual_orientation; do
  echo "Running BBQ category: ${CATEGORY}"
  mkdir -p "${RUN_RESULTS_LOCATION}/${CATEGORY}"

  python -m lrqa.run_lrqa \
    --model_name_or_path "${MODEL_LOCATION}" \
    --model_mode mc \
    --max_seq_length 512 \
    --task_name custom \
    --task_base_path "${BBQ_LOCATION}/${CATEGORY}" \
    --output_dir "${RUN_RESULTS_LOCATION}/${CATEGORY}" \
    --per_device_eval_batch_size ${BATCH_SIZE} \
    --do_eval --do_predict --predict_phases validation \
    --report_to none
done

Running BBQ category: Age
üöÄ Starting fresh (no checkpoint detected).
{
  "eval_loss": 4.836638927459717,
  "eval_model_preparation_time": 0.0027,
  "eval_accuracy": 0.2956521809101105,
  "eval_runtime": 293.7205,
  "eval_samples_per_second": 12.529,
  "eval_steps_per_second": 1.566
}
Running BBQ category: Disability_status
üöÄ Starting fresh (no checkpoint detected).
{
  "eval_loss": 3.6246469020843506,
  "eval_model_preparation_time": 0.0025,
  "eval_accuracy": 0.3129819929599762,
  "eval_runtime": 128.5371,
  "eval_samples_per_second": 12.105,
  "eval_steps_per_second": 1.517
}
Running BBQ category: Gender_identity
üöÄ Starting fresh (no checkpoint detected).
{
  "eval_loss": 4.390811920166016,
  "eval_model_preparation_time": 0.0024,
  "eval_accuracy": 0.3321579694747925,
  "eval_runtime": 464.4576,
  "eval_samples_per_second": 12.212,
  "eval_steps_per_second": 1.527
}
Running BBQ category: Nationality
üöÄ Starting fresh (no checkpoint detected).
{
  "eval_loss": 3.8673694133

2025-12-03 05:36:23.304114: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764740183.324173    4324 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764740183.330164    4324 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1764740183.345132    4324 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764740183.345157    4324 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764740183.345160    4324 computation_placer.cc:177] computation placer alr