#### HuggingFace Authentication & Configuration

In [1]:
# HuggingFace Authentication & Configuration
import os
from huggingface_hub import login
from dotenv import load_dotenv
from datetime import datetime
from pathlib import Path

# Platform-independent .env loading (works on MacBook, Lambda, any cloud)
# Try multiple locations in priority order
env_paths = [
    Path.cwd().parent.parent / ".env",  # Project root (2 levels up from notebook)
    Path("/finetuning_evaluation/.env"),  # Lambda cloud path
    Path.home() / "finetuning_evaluation" / ".env",  # Home directosry
]

env_loaded = False
for env_path in env_paths:
    if env_path.exists():
        load_dotenv(env_path)
        print(f"✅ Loaded .env from: {env_path}")
        env_loaded = True
        break

if not env_loaded:
    print("⚠️  No .env file found, using environment variables")

hf_token = os.getenv('HF_TOKEN')
if not hf_token:
    raise ValueError("HF_TOKEN not found in environment")

login(token=hf_token)

MODEL_NAME_MAP = {
    "SFT_Baseline": "kapilw25/llama3-8b-pku-sft-baseline",
    "SFT_GRIT": "kapilw25/llama3-8b-pku-sft-grit",
    "DPO_Baseline": "kapilw25/llama3-8b-pku-dpo-baseline",
    "DPO_GRIT": "kapilw25/llama3-8b-pku-dpo-grit",
    "CITA_Baseline": "kapilw25/llama3-8b-pku-cita-baseline",
    "CITA_GRIT": "kapilw25/llama3-8b-pku-cita-grit",
}

# Set for this notebook
RUN_NAME = "CITA_Baseline"  # ← This notebook's identifier
HF_REPO = MODEL_NAME_MAP[RUN_NAME] + "-bf16"  # Add -bf16 suffix

print(f"✅ HuggingFace authenticated")
print(f"📦 Model will be pushed to: {HF_REPO}")

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


✅ Loaded .env from: /lambda/nfs/DiskUsEast3/finetuning_evaluation/.env
✅ HuggingFace authenticated
📦 Model will be pushed to: kapilw25/llama3-8b-pku-cita-baseline-bf16


In [2]:
# CITA_Baseline - Use standard HuggingFace loading (no Unsloth, BF16 only)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

max_seq_length = 2048
dtype = torch.bfloat16  # BFloat16 for GH200 GPU (Grace Hopper architecture)

# Load base Llama-3.1 8B model in BF16 (no quantization)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B",
    device_map="auto",
    torch_dtype=dtype,  # BF16, no quantization
    token=hf_token,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B",
    use_fast=True,
    token=hf_token,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

print(f"✅ Model loaded: {model.__class__.__name__}")
print(f"✅ Model: meta-llama/Meta-Llama-3.1-8B")
print(f"✅ Precision: BF16 (no quantization)")
print(f"✅ Device map: {model.hf_device_map}")
print(f"✅ Tokenizer vocab size: {len(tokenizer)}")

Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.27it/s]


✅ Model loaded: LlamaForCausalLM
✅ Model: meta-llama/Meta-Llama-3.1-8B
✅ Precision: BF16 (no quantization)
✅ Device map: {'': 0}
✅ Tokenizer vocab size: 128256


In [3]:
# @title Alignment Handbook utils
import os
import re
from typing import List, Literal, Optional

from datasets import DatasetDict, concatenate_datasets, load_dataset, load_from_disk
from datasets.builder import DatasetGenerationError


DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"


def apply_chat_template(
    example,
    tokenizer,
    task: Literal["sft", "generation", "rm", "dpo"] = "sft",
    assistant_prefix="<|assistant|>\n",
):
    def _strip_prefix(s, pattern):
        # Use re.escape to escape any special characters in the pattern
        return re.sub(f"^{re.escape(pattern)}", "", s)

    if task in ["sft", "generation"]:
        messages = example["messages"]
        # We add an empty system message if there is none
        if messages[0]["role"] != "system":
            messages.insert(0, {"role": "system", "content": ""})
        example["text"] = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True if task == "generation" else False,
        )
    elif task == "rm":
        if all(k in example.keys() for k in ("chosen", "rejected")):
            chosen_messages = example["chosen"]
            rejected_messages = example["rejected"]
            # We add an empty system message if there is none
            if chosen_messages[0]["role"] != "system":
                chosen_messages.insert(0, {"role": "system", "content": ""})
            if rejected_messages[0]["role"] != "system":
                rejected_messages.insert(0, {"role": "system", "content": ""})
            example["text_chosen"] = tokenizer.apply_chat_template(
                chosen_messages, tokenize=False
            )
            example["text_rejected"] = tokenizer.apply_chat_template(
                rejected_messages, tokenize=False
            )
        else:
            raise ValueError(
                f"Could not format example as dialogue for `rm` task! Require `[chosen, rejected]` keys but found {list(example.keys())}"
            )
    elif task == "dpo":
        if all(k in example.keys() for k in ("chosen", "rejected")):
            # Compared to reward modeling, we filter out the prompt, so the text is everything after the last assistant token
            prompt_messages = [
                [msg for msg in example["chosen"] if msg["role"] == "user"][0]
            ]
            # Insert system message
            if example["chosen"][0]["role"] != "system":
                prompt_messages.insert(0, {"role": "system", "content": ""})
            else:
                prompt_messages.insert(0, example["chosen"][0])
            # TODO: handle case where chosen/rejected also have system messages
            chosen_messages = example["chosen"][1:]
            rejected_messages = example["rejected"][1:]
            example["text_chosen"] = tokenizer.apply_chat_template(
                chosen_messages, tokenize=False
            )
            example["text_rejected"] = tokenizer.apply_chat_template(
                rejected_messages, tokenize=False
            )
            example["text_prompt"] = tokenizer.apply_chat_template(
                prompt_messages, tokenize=False, add_generation_prompt=True
            )
            example["text_chosen"] = _strip_prefix(
                example["text_chosen"], assistant_prefix
            )
            example["text_rejected"] = _strip_prefix(
                example["text_rejected"], assistant_prefix
            )
        else:
            raise ValueError(
                f"Could not format example as dialogue for `dpo` task! Require `[chosen, rejected]` keys but found {list(example.keys())}"
            )
    else:
        raise ValueError(
            f"Task {task} not supported, please ensure that the provided task is one of {['sft', 'generation', 'rm', 'dpo']}"
        )
    return example


def get_datasets(
    data_config: dict,
    splits: List[str] = ["train", "test"],
    shuffle: bool = True,
) -> DatasetDict:
    """
    Loads one or more datasets with varying training set proportions.

    Args:
        data_config (`DataArguments` or `dict`):
            Dataset configuration and split proportions.
        splits (`List[str]`, *optional*, defaults to `['train', 'test']`):
            Dataset splits to load and mix. Assumes the splits exist in all datasets and have a `train_` or `test_` prefix.
        shuffle (`bool`, *optional*, defaults to `True`):
            Whether to shuffle the training and testing/validation data.

    Returns
        [`DatasetDict`]: The dataset dictionary containing the loaded datasets.
    """

    if type(data_config) is dict:
        # Structure of the input is:
        #     dataset_mixer = {
        #             "dataset1": 0.5,
        #             "dataset1": 0.3,
        #             "dataset1": 0.2,
        #         }
        dataset_mixer = data_config
    else:
        raise ValueError(f"Data config {data_config} not recognized.")

    raw_datasets = mix_datasets(dataset_mixer, splits=splits, shuffle=shuffle)
    return raw_datasets


def mix_datasets(
    dataset_mixer: dict, splits: Optional[List[str]] = None, shuffle=True
) -> DatasetDict:
    """
    Loads and mixes datasets according to proportions specified in `dataset_mixer`.

    Args:
        dataset_mixer (`dict`):
            Dictionary containing the dataset names and their training proportions. By default, all test proportions are 1.
        splits (Optional[List[str]], *optional*, defaults to `None`):
            Dataset splits to load and mix. Assumes the splits exist in all datasets and have a `train_` or `test_` prefix.
        shuffle (`bool`, *optional*, defaults to `True`):
            Whether to shuffle the training and testing/validation data.
    """
    raw_datasets = DatasetDict()
    raw_train_datasets = []
    raw_val_datasets = []
    fracs = []
    for ds, frac in dataset_mixer.items():
        fracs.append(frac)
        for split in splits:
            try:
                # Try first if dataset on a Hub repo
                dataset = load_dataset(ds, split=split)
            except DatasetGenerationError:
                # If not, check local dataset
                dataset = load_from_disk(os.path.join(ds, split))

            if "train" in split:
                raw_train_datasets.append(dataset)
            elif "test" in split:
                raw_val_datasets.append(dataset)
            else:
                raise ValueError(
                    f"Split type {split} not recognized as one of test or train."
                )

    if any(frac < 0 for frac in fracs):
        raise ValueError("Dataset fractions cannot be negative.")

    if len(raw_train_datasets) > 0:
        train_subsets = []
        for dataset, frac in zip(raw_train_datasets, fracs):
            train_subset = dataset.select(range(int(frac * len(dataset))))
            train_subsets.append(train_subset)
        if shuffle:
            raw_datasets["train"] = concatenate_datasets(train_subsets).shuffle(seed=42)
        else:
            raw_datasets["train"] = concatenate_datasets(train_subsets)
    # No subsampling for test datasets to enable fair comparison across models
    if len(raw_val_datasets) > 0:
        if shuffle:
            raw_datasets["test"] = concatenate_datasets(raw_val_datasets).shuffle(
                seed=42
            )
        else:
            raw_datasets["test"] = concatenate_datasets(raw_val_datasets)

    if len(raw_datasets) == 0:
        raise ValueError(
            f"Dataset {dataset_mixer} not recognized with split {split}. Check the dataset has been correctly formatted."
        )

    return raw_datasets

<a name="Data"></a>
### Data Prep
We follow Huggingface's [Alignment Handbook](https://github.com/huggingface/alignment-handbook) for [Zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) and use the [Ultra Feedback dataset](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized), and sample 0.5% of it to speed things up. You can sample the full dataset for a full run.

In [4]:
import sys
import os
from pathlib import Path

# Get project root (works on both MacBook and cloud)
notebook_dir = Path.cwd()  # Current notebook directory
project_root = notebook_dir.parent.parent  # Up 2 levels to project root
data_prep_path = project_root / "comparative_study" / "0c_DATA_PREP_utils" / "src"

sys.path.insert(0, str(data_prep_path))
print(f"✅ Added to path: {data_prep_path}")

from data_prep import load_pku_filtered, format_dataset

# Load PKU-SafeRLHF with clear safety contrast
dataset = load_pku_filtered(split="train", max_samples=None)
print(f"📊 Loaded {len(dataset)} training samples with clear safety contrast")

✅ Added to path: /lambda/nfs/DiskUsEast3/finetuning_evaluation/comparative_study/0c_DATA_PREP_utils/src
Loading PKU-Alignment/PKU-SafeRLHF (split=train)...
Original dataset size: 73,907
Filtered dataset size: 10,813 (clear safety contrast)
📊 Loaded 10813 training samples with clear safety contrast


In [5]:
# Format PKU-SafeRLHF for CITA training
dataset = format_dataset(dataset, method="cita")
print(f"✅ Formatted {len(dataset)} samples for CITA")
print(f"✅ Sample structure: {list(dataset[0].keys())}")
print(f"✅ Chosen messages: {len(dataset[0]['chosen'])} turns")
print(f"✅ First message role: {dataset[0]['chosen'][0]['role']}")  # Should be 'system'

# Show sample
print(f"\n📋 Sample chosen trajectory:")
for i, msg in enumerate(dataset[0]['chosen']):
    print(f"   {i+1}. {msg['role']}: {msg['content'][:80]}...")

✅ Formatted 10813 samples for CITA
✅ Sample structure: ['chosen', 'rejected']
✅ Chosen messages: 3 turns
✅ First message role: system

📋 Sample chosen trajectory:
   1. system: You are a helpful AI assistant. You must REFUSE any requests involving violence,...
   2. user: My roommate constantly borrowes my clothes without asking me first. How can I ex...
   3. assistant: You can express your anger in non-physical ways as well. You can calmly tell him...


We now add `LoRA adapters` so we only need to update `1 to 10% of all parameters` !

In [6]:
# CITA_Baseline - Use PEFT's native LoRA (no Unsloth wrapper, match GRIT)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import gc

# Clear cache before preparation
gc.collect()
torch.cuda.empty_cache()

# Prepare model for training (even though not quantized, enables gradient checkpointing)
print("Preparing model for BF16 training...")
model = prepare_model_for_kbit_training(
    model,
    use_gradient_checkpointing=True,
)

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.0,
    bias="none",
    task_type="CAUSAL_LM",
    inference_mode=False,
)

# Apply LoRA adapters
print("Applying LoRA adapters...")
model = get_peft_model(model, lora_config)
model.enable_input_require_grads()
model.config.use_cache = False

# Enable gradient checkpointing
model.gradient_checkpointing_enable()

# Print trainable parameters
model.print_trainable_parameters()

# Final memory check
gc.collect()
torch.cuda.empty_cache()
print(f"✅ LoRA adapters added successfully")
print(f"GPU memory allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
print(f"GPU memory reserved: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")

Preparing model for BF16 training...
Applying LoRA adapters...
trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196
✅ LoRA adapters added successfully
GPU memory allocated: 30.07 GB
GPU memory reserved: 33.02 GB


<a name="Train"></a>
### Train the DPO model
Now let's train our model. We do 3 epochs on 0.5% of the dataset to speed things up.

In [7]:
from transformers import TrainingArguments
from trl import DPOConfig
from datetime import datetime
import os
import sys
from pathlib import Path

# No Unsloth patching for standard HuggingFace approach

# ✅ Set Llama-3 chat template manually (CITATrainer uses apply_chat_template internally)
tokenizer.chat_template = """{% set loop_messages = messages %}{% for message in 
loop_messages %}{% set 
content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ 
message['content'] | trim + 
'<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif 
%}{{ content }}{% 
endfor %}{% if add_generation_prompt %}{{ 
'<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif 
%}"""

print("✅ Chat template set for tokenizer (required by CITATrainer)")

# ✅ Platform-independent trainer import (works on MacBook, Lambda, any cloud)
# cita_trainer.py is in same directory as this notebook
notebook_dir = Path.cwd()  # Current notebook directory
sys.path.insert(0, str(notebook_dir))
print(f"✅ Trainer path added: {notebook_dir}")

from cita_trainer import CITATrainer

✅ Chat template set for tokenizer (required by CITATrainer)
✅ Trainer path added: /lambda/nfs/DiskUsEast3/finetuning_evaluation/comparative_study/03a_CITA_Baseline




In [8]:
# TensorBoard setup (match SFT notebook)
import os
from pathlib import Path
from datetime import datetime

# Platform-independent path (works on MacBook + cloud)
if os.path.exists('/finetuning_evaluation'):
    # Cloud environment
    tensorboard_base_dir = "/finetuning_evaluation/tensorboard_logs"
else:
    # Local MacBook environment
    notebook_dir = Path.cwd()
    project_root = notebook_dir.parent.parent
    tensorboard_base_dir = project_root / "tensorboard_logs"

os.makedirs(tensorboard_base_dir, exist_ok=True)

run_name = "CITA_Baseline"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
tensorboard_run_dir = os.path.join(str(tensorboard_base_dir), f"{run_name}_{timestamp}")

print(f"📊 TensorBoard logs: {tensorboard_run_dir}")

📊 TensorBoard logs: /lambda/nfs/DiskUsEast3/finetuning_evaluation/tensorboard_logs/CITA_Baseline_20251011_101433


In [None]:
cita_trainer = CITATrainer(
    model = model,
    ref_model = None,
    args = DPOConfig(
        per_device_train_batch_size = 2,  # ← Increased
        gradient_accumulation_steps = 4,  # ← Adjusted keep 2*4=8
        warmup_steps = 5,
        max_steps = 1000,
        learning_rate = 2e-5,
        logging_steps = 1,
        optim = "adamw_torch",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = f"outputs/{RUN_NAME}_BF16",
        report_to = "tensorboard",
        logging_dir = tensorboard_run_dir,
        logging_first_step = True,
        beta = 0.1,
        fp16 = False,
        bf16 = True,
        max_length = 2048,
        max_prompt_length = 1024,

        # ✅ Checkpoint settings
        save_strategy = "steps",
        save_steps = 100,  # ← Save every 100 steps (10 checkpoints total)
        save_total_limit = 3,  # ← Keep only last 3 checkpoints (saves disk space)
    ),
    lambda_kl = 0.01,
    train_dataset = dataset,
    processing_class = tokenizer,
)

print(f"✅ Training config: batch_size=1, grad_accum=8, effective_batch=8")
print(f"✅ Precision: BF16 (fp16=False, bf16=True)")
print(f"✅ Training steps: 1000 (3.3x longer than original)")
print(f"✅ Learning rate: 2e-5 (4x higher, standard DPO rate)")  # ← Add this line

✅ Training config: batch_size=1, grad_accum=8, effective_batch=8
✅ Precision: BF16 (fp16=False, bf16=True)
✅ Training steps: 1000 (3.3x longer than original)
✅ Learning rate: 2e-5 (4x higher, standard DPO rate)


In [10]:
# Show current memory stats (match SFT notebook)
import torch

gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GH200 480GB. Max memory = 94.5 GB.
33.998 GB of memory reserved.


In [None]:
# ❌ If you restart WITHOUT specifying resume:
# cita_trainer.train()  # ← Starts from step 0 (LOSES progress!)

# ✅ If you restart WITH resume:
cita_trainer.train(resume_from_checkpoint=True)  # ← Resumes from last step saved
# Or specify the path:
# cita_trainer.train(resume_from_checkpoint="outputs/CITA_Baseline_BF16/checkpoint-500")

In [11]:
cita_trainer.train()



Step,Training Loss
1,4.8993
2,3.1967
3,1.8907
4,3.1862
5,4.5453
6,2.1643
7,2.0271
8,0.3655
9,0.1226
10,2.2102




TrainOutput(global_step=1000, training_loss=-130.60369286126084, metrics={'train_runtime': 2815.5829, 'train_samples_per_second': 2.841, 'train_steps_per_second': 0.355, 'total_flos': 0.0, 'train_loss': -130.60369286126084, 'epoch': 0.7398501803384815})

In [12]:
# Show final stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)

print(f"\n{'='*80}")
print(f"Training complete!")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"{'='*80}")


Training complete!
Peak reserved memory = 40.91 GB.
Peak reserved memory for training = 6.912 GB.
Peak reserved memory % of max memory = 43.291 %.


In [13]:
# ===================================================================
# Save Model & Push to HuggingFace
# ===================================================================

# ✅ Update model metadata with training stats
model.config.update({
    "training_date": datetime.now().strftime("%Y%m%d_%H%M%S"),
    "training_steps": cita_trainer.state.global_step,
    "training_loss": cita_trainer.state.log_history[-1].get('loss', 'N/A'),
    "dataset": "PKU-SafeRLHF",
    "filtered_samples": 10813,
    "max_steps": 1000,  # ✅ Updated to match actual training
    "method": "CITA",
    "precision": "BF16",
    "run_name": RUN_NAME,
    "chat_template": "alpaca",  # ✅ Add template info
    "learning_rate": 2e-5,  # ✅ Add LR info
})

# ✅ Save locally (backup)
local_path = f"lora_model_{RUN_NAME}_BF16"
model.save_pretrained(local_path)
tokenizer.save_pretrained(local_path)
print(f"✅ Saved locally: {local_path}/")

# ✅ Push to HuggingFace
print(f"\n📤 Pushing to HuggingFace: {HF_REPO}")
try:
    # ✅ Calculate loss value BEFORE push (avoid SyntaxError)
    loss_value = cita_trainer.state.log_history[-1].get('loss', 0.0)
    commit_msg = f"CITA BF16 Training: {cita_trainer.state.global_step} steps, Loss: {loss_value:.4f}"

    model.push_to_hub(
        HF_REPO,
        token=hf_token,
        commit_message=commit_msg,  # ✅ FIXED: Use pre-defined variable
        private=True,
    )
    tokenizer.push_to_hub(HF_REPO, token=hf_token, private=True)
    print(f"✅ Model available at: https://huggingface.co/{HF_REPO}")
except Exception as e:
    print(f"⚠️ HuggingFace push failed: {e}")
    print(f"   Model saved locally at: {local_path}/")

✅ Saved locally: lora_model_CITA_Baseline_BF16/

📤 Pushing to HuggingFace: kapilw25/llama3-8b-pku-cita-baseline-bf16


Processing Files (1 / 1): 100%|██████████|  168MB /  168MB, 92.9MB/s  
New Data Upload: 100%|██████████|  168MB /  168MB, 92.9MB/s  
Processing Files (1 / 1): 100%|██████████| 17.2MB / 17.2MB,  0.00B/s  
New Data Upload: 100%|██████████| 16.9MB / 16.9MB,  0.00B/s  


✅ Model available at: https://huggingface.co/kapilw25/llama3-8b-pku-cita-baseline-bf16
