In [None]:
!pip -q install bitsandbytes \
               git+https://github.com/huggingface/peft.git@42a184f \
               accelerate>=0.26.0 \
               datasets==2.12.0 loralib==0.1.1 einops==0.6.1 \
               torchmetrics==1.3.1

[0m

In [None]:
import os, json, math
from pathlib import Path
from collections import defaultdict
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchmetrics.classification import Accuracy

import bitsandbytes as bnb
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
                          BitsAndBytesConfig)
from peft import (LoraConfig, get_peft_model,
                  prepare_model_for_kbit_training)
import accelerate

In [None]:
MODEL_NAME  = "vilsonrodrigues/falcon-7b-instruct-sharded"
BBQ_PATH    = "BBQ"
SUBSET_FILE = "Race_ethnicity.jsonl"      # change to Sexual_orientation.jsonl for SO
MAX_LEN     = 512
BATCH_TRAIN = 4
BATCH_VAL   = 8
LR          = 2e-4
EPOCHS      = 2
LAMBDA      = 0.10                        # λ in L_total = L_QA - λ L_adv
DEVICE      = "cuda" if torch.cuda.is_available() else "cpu"

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
if not Path(BBQ_PATH).exists():
    !git clone -q https://github.com/nyu-mll/BBQ.git

def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

In [None]:
bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
)
print("⇢ loading Falcon‑7B (4‑bit) ..."); torch.cuda.empty_cache()
base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        quantization_config=bnb_cfg
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

⇢ loading Falcon‑7B (4‑bit) ...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/6.70k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/vilsonrodrigues/falcon-7b-instruct-sharded:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/vilsonrodrigues/falcon-7b-instruct-sharded:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

model-00001-of-00015.safetensors:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

model-00006-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00002-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00007-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00008-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00004-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00005-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00003-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00009-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00010-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00011-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00012-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00013-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00014-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00015-of-00015.safetensors:   0%|          | 0.00/828M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [None]:
base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)

lora_cfg = LoraConfig(
        r=16, lora_alpha=32, lora_dropout=0.05,
        target_modules=["query_key_value"],
        bias="none", task_type="CAUSAL_LM"
)
base_model = get_peft_model(base_model, lora_cfg)
print("LoRA params (trainable vs total):")
def _cnt(m):
    train, total = 0,0
    for p in m.parameters():
        total += p.numel()
        train  += p.numel() if p.requires_grad else 0
    return train, total
tr, tot = _cnt(base_model)
print(f"  {tr:,} / {tot:,}  ({100*tr/tot:.2f}%) trainable")


You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.
You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


LoRA params (trainable vs total):
  4,718,592 / 3,613,463,424  (0.13%) trainable


In [None]:
def get_group_label(entry):
    """
    BBQ v1 encodes protected attributes under
        entry["additional_metadata"]["stereotyped_groups"]
    which is *always* a non‑empty list of strings.
    We join multiple strings with '/' so each distinct combo
    still maps to a single ID.
    """
    meta = entry.get("additional_metadata", {})
    g_list = meta.get("stereotyped_groups", [])
    if not g_list:                           # safety fallback
        return "UNKNOWN"
    return "/".join(sorted(set(g_list)))

In [None]:
GROUP2ID = defaultdict(lambda: len(GROUP2ID))

def make_prompt(entry):
    qs = entry["question"]; ctx = entry["context"]
    opts = [entry["ans0"], entry["ans1"], entry["ans2"]]
    opts_str = "\n".join([f"{l}. {o}" for l,o in zip("ABC", opts)])
    return ( "Choose the most appropriate answer to the question "
             "given the context.\n\n"
             f"Context: {ctx}\nQuestion: {qs}\nOptions:\n{opts_str}\nAnswer:" )

class BBQDataset(Dataset):
    def __init__(self, jsonl_path, tokenizer, max_len=512):
        self.raw  = load_jsonl(jsonl_path)
        self.tok  = tokenizer
        self.maxl = max_len

        # populate GROUP2ID once
        for ex in self.raw:
            _ = GROUP2ID[get_group_label(ex)]

    def __len__(self):
        return len(self.raw)

    def __getitem__(self, idx):
        ex   = self.raw[idx]
        grp  = get_group_label(ex)
        enc  = self.tok(
                  make_prompt(ex),
                  truncation=True,
                  padding="max_length",
                  max_length=self.maxl,
                  return_tensors="pt"
              )
        return {
            "input_ids"     : enc.input_ids.squeeze(0),
            "attention_mask": enc.attention_mask.squeeze(0),
            "answer_idx"    : torch.tensor(ex["label"], dtype=torch.long),
            "group_id"      : torch.tensor(GROUP2ID[grp], dtype=torch.long),
            "example_id"    : ex["example_id"]
        }

In [None]:
ds_all = BBQDataset(Path(BBQ_PATH)/"data"/SUBSET_FILE, tokenizer, MAX_LEN)
train_len = int(0.9*len(ds_all)); val_len = len(ds_all)-train_len
train_ds, val_ds = random_split(ds_all, [train_len, val_len])
train_dl = DataLoader(train_ds, batch_size=BATCH_TRAIN, shuffle=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_VAL)

In [None]:
GROUP2ID

defaultdict(<function __main__.<lambda>()>,
            {'African American/Black/Hispanic/Latino': 0,
             'African American/Black': 1,
             'Asian': 2,
             'Native American': 3,
             'Roma': 4,
             'Hispanic/Latino': 5,
             'Arab/Middle Eastern': 6,
             'African American/Black/Hispanic/Latino/Native American': 7,
             'Jewish': 8})

In [None]:
class GRL(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, lamb): ctx.lamb=lamb; return x.view_as(x)
    @staticmethod
    def backward(ctx, g): return -ctx.lamb * g, None

class DebiasFalcon(nn.Module):
    def __init__(self, base, n_answers=3, n_groups=len(GROUP2ID), lamb=0.1):
        super().__init__()
        self.base  = base
        self.lamb  = lamb
        hid        = base.config.hidden_size
        self.qa    = nn.Linear(hid, n_answers)
        self.adv   = nn.Sequential(nn.Linear(hid, hid//2),
                                   nn.ReLU(),
                                   nn.Linear(hid//2, n_groups))
    def forward(self, input_ids, attention_mask,
                answer_idx=None, group_id=None, return_hidden=False):
        out = self.base(input_ids=input_ids,
                        attention_mask=attention_mask,
                        use_cache=False,
                        output_hidden_states=True,
                        return_dict=True)
        h_cls = out.hidden_states[-1][:,0]          # [batch, hidden]
        qa_logits  = self.qa(h_cls)
        adv_logits = self.adv(GRL.apply(h_cls, self.lamb))
        loss = None
        if answer_idx is not None and group_id is not None:
            qa_loss  = F.cross_entropy(qa_logits,  answer_idx)
            adv_loss = F.cross_entropy(adv_logits, group_id)
            loss = qa_loss - self.lamb * adv_loss
        if return_hidden:
            return qa_logits, h_cls
        return {"loss": loss, "qa_logits": qa_logits}

model = DebiasFalcon(base_model, lamb=LAMBDA).to(DEVICE)

In [None]:
optim = torch.optim.AdamW(filter(lambda p: p.requires_grad,
                                 model.parameters()), lr=LR)
accuracy = Accuracy(task="multiclass", num_classes=3).to(DEVICE)

def evaluate():
    model.eval(); accuracy.reset()
    preds_jsonl = []
    with torch.no_grad():
        for batch in val_dl:
            mb = {k:v.to(DEVICE) if torch.is_tensor(v) else v for k,v in batch.items()}
            logits = model(**mb)["qa_logits"]
            accuracy.update(logits, mb["answer_idx"])
            pred_idx = logits.argmax(-1).tolist()
            for eid,p in zip(mb["example_id"], pred_idx):
                preds_jsonl.append(
                    {#"example_id": int(eid),
                     "prediction_idx": int(p),
                     "stereotyped_group": int(mb["group_id"][0])}  # rough
                )
    acc = accuracy.compute().item()
    return acc, preds_jsonl

In [None]:
def evaluate():
    model.eval(); accuracy.reset()
    preds_jsonl = []
    with torch.no_grad():
        for batch in val_dl:
            mb = {k:v.to(DEVICE) if torch.is_tensor(v) else v for k,v in batch.items()}
            # Only pass the expected arguments to the model
            logits = model(input_ids=mb["input_ids"], attention_mask=mb["attention_mask"])["qa_logits"]
            accuracy.update(logits, mb["answer_idx"])
            pred_idx = logits.argmax(-1).tolist()
            for eid,p in zip(mb["example_id"], pred_idx):
                preds_jsonl.append(
                    {#"example_id": int(eid),
                     "prediction_idx": int(p),
                     "stereotyped_group": int(mb["group_id"][0])}  # rough
                )
    acc = accuracy.compute().item()
    return acc, preds_jsonl

In [None]:
print(f"\n⇢ Starting training  (λ = {LAMBDA})")
for ep in range(1, EPOCHS+1):
    model.train()
    for batch in train_dl:
        # Move tensors to device
        mb = {k: v.to(DEVICE)
              for k, v in batch.items()
              if torch.is_tensor(v)}

        # Pull out only the args the model expects
        inputs = {
            "input_ids"     : mb["input_ids"],
            "attention_mask": mb["attention_mask"],
            "answer_idx"    : mb["answer_idx"],
            "group_id"      : mb["group_id"]
        }

        out = model(**inputs)
        out["loss"].backward()
        optim.step()
        optim.zero_grad()

    val_acc, preds = evaluate()
    print(f"Epoch {ep:02d}  –  QA val accuracy: {val_acc:.3f}")



⇢ Starting training  (λ = 0.1)


  return fn(*args, **kwargs)


KeyboardInterrupt: 

In [None]:
val_acc = evaluate()
print(f"Epoch {ep:02d}  –  QA val accuracy: {val_acc:.3f}")