# Setup


Make sure you use Kaggle's P100 GPU. This notebook has not been tested with any other GPU.


In [8]:
import os
import wandb
from kaggle_secrets import UserSecretsClient
from typing import Literal

TRAIN_TYPE: Literal["SFT", "DPO"] = "DPO"  # SFT or DPO

MAX_STEPS = 1  # edit this after debugging
LIMIT = 1  # edit this after debugging

try:
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("WANDB_API_KEY")
    os.environ["WANDB_API_KEY"] = wandb_api_key
    wandb.login(key=wandb_api_key)
except Exception as e:
    print("WANDB_API_KEY not set or failed to load.")
    print("Reason:", str(e))
    print("In Kaggle, add it via Add-ons → Secrets → Add Secret.")

MODEL_BASE_UIDS = [
    "unsloth/llama-3.2-1B-bnb-4bit",
    "unsloth/llama-3.2-3B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
]
# those are our finetuned SFT models
MODEL_SFT_UIDS = ["alextsiak/llama-3.2-1B-bnb-4bit-mix-500st","mzarev/Llama-3.1-8B-bnb-4bit-mix-500st"]
DATASET_SFT_UIDS = [
    "allenai/tulu-3-sft-personas-math-grade",
    "allenai/tulu-3-sft-personas-math",
    "allenai/tulu-3-sft-personas-instruction-following",
    "allenai/tulu-3-sft-personas-algebra",
    "allenai/tulu-3-sft-personas-code",
]
DATASET_SFT_MIXTURE_UIDS = ["allenai/tulu-3-sft-mixture"]
DATASET_DPO_UIDS = [
    "allenai/llama-3.1-tulu-3-8b-preference-mixture",
    "allenai/llama-3.1-tulu-3-70b-preference-mixture",
    "allenai/llama-3.1-tulu-3-405b-preference-mixture",
]
LM_EVAL_UIDS = [
    "hellaswag",
    "gsm8k",
    "arc_easy",
    "truthfulqa",
    "winogrande",
    "humaneval",
]


MODEL_BASE_UID = MODEL_BASE_UIDS[0]  # choose depending on choice of finetuned model (if any)
MODEL_FINETUNED_UID = MODEL_SFT_UIDS[0]  # choose
DATASET_UIDS = DATASET_DPO_UIDS  # make sure this represents the datasets you're currently interested in
DATASET_UID = DATASET_UIDS[0]  # choose your dataset


print("Config done...")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Config done...


In [2]:
!git clone https://github.com/EleutherAI/lm-evaluation-harness.git
!pip install -e ./lm-evaluation-harness/.
!pip install unsloth transformers datasets wandb pandas

fatal: destination path 'lm-evaluation-harness' already exists and is not an empty directory.
Obtaining file:///kaggle/working/lm-evaluation-harness
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting evaluate (from lm_eval==0.4.8)
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting jsonlines (from lm_eval==0.4.8)
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Collecting pytablewriter (from lm_eval==0.4.8)
  Downloading pytablewriter-1.2.1-py3-none-any.whl.metadata (38 kB)
Collecting rouge-score>=0.0.4 (from lm_eval==0.4.8)
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sacrebleu>=1.5.0 (from lm_eval==0.4.8)
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)


# Fine-Tuning


In [9]:
from unsloth import FastLanguageModel
import wandb
from transformers import BitsAndBytesConfig
from trl import SFTTrainer, SFTConfig, DPOTrainer, DPOConfig
import torch
from datasets import load_dataset
import json

torch.cuda.empty_cache()

model_name = MODEL_BASE_UID.split("/")[-1]
dataset_name = DATASET_UID.split("/")[-1]

if TRAIN_TYPE == "SFT":
    base_model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MODEL_BASE_UID, max_seq_length=2048, dtype=None, load_in_4bit=True
    )
    base_model.save_pretrained(model_name)

    model = FastLanguageModel.get_peft_model(
        base_model,
        r=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha=16,
        lora_dropout=0,  # Supports any, but = 0 is optimized
        bias="none",  # Supports any, but = "none" is optimized
        use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
        max_seq_length=2048,
        use_rslora=False,  # We support rank stabilized LoRA
        loftq_config=None,  # And LoftQ
    )
else:
    # this is the original base model
    ref_model, _ = FastLanguageModel.from_pretrained(
        model_name=MODEL_BASE_UID,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )
    ref_model.save_pretrained(model_name)
    # this is our LoRA-adapted SFT model
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MODEL_FINETUNED_UID,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )

    tokenizer.chat_template = """<s>[INST] {{ user }} [/INST] {{ assistant }}</s>"""

    # don't want to train the reference model
    for param in ref_model.parameters():
        param.requires_grad = False

model.config.quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

train_dataset = load_dataset(DATASET_UID, split="train")
print(train_dataset[0].keys())


if TRAIN_TYPE == "SFT":
    print(json.dumps(train_dataset[0]["messages"], indent=2))
    
    def formatting_func(examples):
        messages = examples["messages"]
        texts = [
            "".join([m["content"].strip() + "\n" for m in convo]).strip()
            for convo in messages
        ]
        return {"text": texts}

    train_dataset = train_dataset.map(formatting_func, batched=True)

wandb.login(key=os.environ["WANDB_API_KEY"])
wandb.init(
    project="pm-pt",
    name=f"{model_name}_{dataset_name}",
    config={
        "model": MODEL_BASE_UID,
        "dataset": DATASET_UID,
        "max_steps": MAX_STEPS,
        "learning_rate": 2e-4,
        "batch_size": 2,
        "gradient_accumulation_steps": 4,
    },
)

if TRAIN_TYPE == "SFT":
    trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        tokenizer=tokenizer,
        args=SFTConfig(
            dataset_text_field="text",
            max_seq_length=2048,
            learning_rate=2e-4,
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            warmup_steps=5,
            max_steps=MAX_STEPS,
            report_to="wandb",
            run_name=f"{model_name}_{dataset_name}",
            output_dir="outputs",
            optim="adamw_8bit",
        ),
    )
else:
    trainer = DPOTrainer(
        model=model,
        ref_model=ref_model,
        args=DPOConfig(
            beta=0.1,
            max_length=2048,
            learning_rate=2e-4,
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            warmup_steps=5,
            max_steps=MAX_STEPS,
            report_to="wandb",
            run_name=f"{model_name}_{dataset_name}",
            output_dir="outputs",
            optim="adamw_8bit",
        ),
        train_dataset=train_dataset,
        tokenizer=tokenizer,
    )

trainer.train()

wandb.finish()

model.save_pretrained(f"{model_name}_{TRAIN_TYPE}_finetuned_{dataset_name}")
tokenizer.save_pretrained(f"{model_name}_{TRAIN_TYPE}_finetuned_{dataset_name}")

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


dict_keys(['id', 'source', 'prompt', 'chosen', 'rejected'])


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 272,898 | Num Epochs = 1 | Total steps = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 11,272,192/1,000,000,000 (1.13% trained)


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / chosen,logps / rejected,logits / chosen,logits / rejected,eval_logits / chosen,eval_logits / rejected,nll_loss,aux_loss
1,0.6931,-0.186095,-0.186095,0.0,0.0,-7.196974,-7.196974,2.794866,2.794866,0,0,0,0


0,1
train/epoch,▁▁
train/global_step,▁▁
train/grad_norm,▁
train/learning_rate,▁
train/logits/chosen,▁
train/logits/rejected,▁
train/logps/chosen,▁
train/logps/rejected,▁
train/loss,▁
train/rewards/accuracies,▁

0,1
total_flos,0.0
train/epoch,6e-05
train/global_step,1.0
train/grad_norm,0.0
train/learning_rate,0.0
train/logits/chosen,2.79487
train/logits/rejected,2.79487
train/logps/chosen,-7.19697
train/logps/rejected,-7.19697
train/loss,0.6931


('llama-3.2-1B-bnb-4bit_DPO_finetuned_llama-3.1-tulu-3-8b-preference-mixture/tokenizer_config.json',
 'llama-3.2-1B-bnb-4bit_DPO_finetuned_llama-3.1-tulu-3-8b-preference-mixture/special_tokens_map.json',
 'llama-3.2-1B-bnb-4bit_DPO_finetuned_llama-3.1-tulu-3-8b-preference-mixture/tokenizer.json')

# Evaluation


In [10]:
import os
import subprocess

model_name = MODEL_BASE_UID.split("/")[-1]
dataset_name = DATASET_UID.split("/")[-1]

peft_path = f"./{model_name}_{TRAIN_TYPE}_finetuned_{dataset_name}"

os.environ["HF_ALLOW_CODE_EVAL"] = "1"

tasks_str = ",".join(LM_EVAL_UIDS)

command = [
    "lm_eval",
    "--model",
    "hf",
    "--model_args",
    f"pretrained={model_name},peft={peft_path}",
    "--tasks",
    tasks_str,
    "--confirm_run_unsafe_code",
    "--device",
    "cuda",
    "--batch_size",
    "auto",
    "--limit",
    str(LIMIT),
]

subprocess.run(command)

2025-05-06 19:33:30.881131: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746560010.906012     826 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746560010.913051     826 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Generating train split: 100%|██████████| 2251/2251 [00:00<00:00, 219210.08 examples/s]
Generating test split: 100%|██████████| 2376/2376 [00:00<00:00, 328433.78 examples/s]
Generating validation split: 100%|██████████| 570/570 [00:00<00:00, 175018.54 examples/s]
Generating train split: 100%|██████████| 7473/7473 [00:00<00:00, 361143.83 examples/s]
Generating test split: 100%|██████████| 1319/1319 [00:00<00:00, 219274.16 examples/s]
D

Passed argument batch_size = auto:1. Detecting largest batch size
Determined largest batch size: 64


Running generate_until requests: 100%|██████████| 3/3 [00:41<00:00, 13.92s/it]


Passed argument batch_size = auto. Detecting largest batch size
Determined Largest batch size: 1


fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


hf (pretrained=llama-3.2-1B-bnb-4bit,peft=./llama-3.2-1B-bnb-4bit_DPO_finetuned_llama-3.1-tulu-3-8b-preference-mixture), gen_kwargs: (None), limit: 1.0, num_fewshot: None, batch_size: auto (64)
|    Tasks     |Version|     Filter     |n-shot|  Metric   |   | Value |   |Stderr|
|--------------|------:|----------------|-----:|-----------|---|------:|---|------|
|arc_easy      |      1|none            |     0|acc        |↑  | 1.0000|±  |   N/A|
|              |       |none            |     0|acc_norm   |↑  | 1.0000|±  |   N/A|
|gsm8k         |      3|flexible-extract|     5|exact_match|↑  | 0.0000|±  |   N/A|
|              |       |strict-match    |     5|exact_match|↑  | 0.0000|±  |   N/A|
|hellaswag     |      1|none            |     0|acc        |↑  | 0.0000|±  |   N/A|
|              |       |none            |     0|acc_norm   |↑  | 1.0000|±  |   N/A|
|humaneval     |      1|create_test     |     0|pass@1     |   | 0.0000|±  |   N/A|
|truthfulqa_gen|      3|none            |     0|bl

CompletedProcess(args=['lm_eval', '--model', 'hf', '--model_args', 'pretrained=llama-3.2-1B-bnb-4bit,peft=./llama-3.2-1B-bnb-4bit_DPO_finetuned_llama-3.1-tulu-3-8b-preference-mixture', '--tasks', 'hellaswag,gsm8k,arc_easy,truthfulqa,winogrande,humaneval', '--confirm_run_unsafe_code', '--device', 'cuda', '--batch_size', 'auto', '--limit', '1'], returncode=0)

# Creating Excel Sheet Template

In [12]:
import pandas as pd
from itertools import product

columns = ["model_uid", "dataset_uid"] + LM_EVAL_UIDS

model_dataset_pairs = list(product(MODEL_BASE_UIDS, DATASET_UIDS))

empty_eval_df = pd.DataFrame(columns=columns)

for model_uid, dataset_uid in model_dataset_pairs:
    row = {
        "model_uid": model_uid,
        "dataset_uid": dataset_uid,
    }
    for task in LM_EVAL_UIDS:
        row[task] = None
    empty_eval_df.loc[len(empty_eval_df)] = row

empty_eval_df.to_excel("empty_eval_results.xlsx", index=False)

print("Created empty eval_results.xlsx")

Created empty eval_results.xlsx
