# notebooks/1_dpo_track.ipynb

"""
Project 4: Privacy-Preserving Alignment
Notebook 1: DPO Track (All DPO Variants)

Purpose: Train all DPO models (baseline + DP variants)
Optimized: MAX_LENGTH=224 (5.2x faster, 96.7% coverage)
Time: ~40 minutes on T4 (vs 3-4 hours original)
"""

In [2]:
# Mount Google Drive
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

if os.path.exists('/content/drive/MyDrive'):
    print(" Google Drive mounted successfully!")
else:
    print(" Drive mount failed!")


Mounted at /content/drive
 Google Drive mounted successfully!


In [3]:
!pip install -q transformers datasets peft trl opacus accelerate  --q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/423.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/254.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.4/254.4 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Setup
import sys
import torch
import json
import numpy as np
from pathlib import Path
from datasets import load_from_disk
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
from trl import DPOTrainer, DPOConfig
from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
import time
from tqdm.auto import tqdm
import shutil

print(" Imports complete")


 Imports complete


In [6]:

# Configure Paths (Drive + Local)
# === GOOGLE DRIVE PATHS (PERSISTENT) ===
DRIVE_BASE = Path("/content/drive/MyDrive/Project4_Privacy_Alignment")
DRIVE_DATA_DIR = DRIVE_BASE / "data"
DRIVE_MODELS_DIR = DRIVE_BASE / "models"
DRIVE_RESULTS_DIR = DRIVE_BASE / "results"

# === LOCAL PATHS (TEMPORARY - FASTER FOR TRAINING) ===
LOCAL_BASE = Path("/content")
LOCAL_DATA_DIR = LOCAL_BASE / "data"
LOCAL_MODELS_DIR = LOCAL_BASE / "models"
LOCAL_RESULTS_DIR = LOCAL_BASE / "results"
CHECKPOINT_DIR = LOCAL_BASE / "checkpoints"

# Create directories
for dir_path in [LOCAL_DATA_DIR, LOCAL_MODELS_DIR, LOCAL_RESULTS_DIR,
                 CHECKPOINT_DIR, DRIVE_MODELS_DIR, DRIVE_RESULTS_DIR]:
    dir_path.mkdir(exist_ok=True, parents=True)

print(" Directories configured")
print(f" Data will load from: {DRIVE_DATA_DIR}")
print(f" Models will save to: {DRIVE_MODELS_DIR}")

 Directories configured
 Data will load from: /content/drive/MyDrive/Project4_Privacy_Alignment/data
 Models will save to: /content/drive/MyDrive/Project4_Privacy_Alignment/models


In [7]:


# Load Data from Drive
print("\n Loading processed data from Google Drive...")

# Copy from Drive to local (faster for training)
drive_dataset_path = DRIVE_DATA_DIR / "hh_rlhf_processed"
local_dataset_path = LOCAL_DATA_DIR / "hh_rlhf_processed"

if not drive_dataset_path.exists():
    raise FileNotFoundError(
        f" Data not found in Drive!\n"
        f"Expected: {drive_dataset_path}\n"
        f"Please run Notebook 0 first to prepare data."
    )

# Copy to local for faster access
if local_dataset_path.exists():
    shutil.rmtree(local_dataset_path)

print("   Copying from Drive to local (faster for training)...")
shutil.copytree(drive_dataset_path, local_dataset_path)

# Copy config
shutil.copy2(
    DRIVE_DATA_DIR / "config.json",
    LOCAL_DATA_DIR / "config.json"
)

# Load dataset
dataset = load_from_disk(str(local_dataset_path))
train_dataset = dataset['train']
test_dataset = dataset['test']

# Load config
with open(LOCAL_DATA_DIR / "config.json") as f:
    config = json.load(f)

print(f" Data loaded from Drive!")
print(f"   Train: {len(train_dataset)} samples")
print(f"   Test: {len(test_dataset)} samples")
print(f"   Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")


 Loading processed data from Google Drive...
   Copying from Drive to local (faster for training)...
 Data loaded from Drive!
   Train: 18000 samples
   Test: 2000 samples
   Device: Tesla T4


In [8]:

# Initialize Tokenizer with Optimized MAX_LENGTH
print("\n Loading tokenizer...")

tokenizer = AutoTokenizer.from_pretrained(config['policy_model'])
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'  # Important for generation

# OPTIMIZED: Use 224 instead of 512
MAX_LENGTH = 224  # Covers 96.7% of data, 5.2x faster

print(f" Tokenizer loaded: {config['policy_model']}")
print(f"   Vocab size: {len(tokenizer)}")
print(f"   MAX_LENGTH: {MAX_LENGTH} ")



 Loading tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

 Tokenizer loaded: gpt2
   Vocab size: 50257
   MAX_LENGTH: 224 


In [9]:



# Tokenize Dataset
def tokenize_function(examples):
    """Tokenize prompts and responses with optimized max_length"""
    prompts = examples['prompt']
    chosen = examples['chosen']
    rejected = examples['rejected']

    # Tokenize with optimized MAX_LENGTH
    prompt_tokens = tokenizer(prompts, truncation=True, max_length=MAX_LENGTH)
    chosen_tokens = tokenizer(chosen, truncation=True, max_length=MAX_LENGTH)
    rejected_tokens = tokenizer(rejected, truncation=True, max_length=MAX_LENGTH)

    return {
        'input_ids': prompt_tokens['input_ids'],
        'attention_mask': prompt_tokens['attention_mask'],
        'chosen_input_ids': chosen_tokens['input_ids'],
        'rejected_input_ids': rejected_tokens['input_ids'],
    }

print(" Tokenizing dataset...")
tokenized_train = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=train_dataset.column_names,
    desc="Tokenizing train"
)
tokenized_test = test_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=test_dataset.column_names,
    desc="Tokenizing test"
)

print(" Tokenization complete")

 Tokenizing dataset...


Tokenizing train:   0%|          | 0/18000 [00:00<?, ? examples/s]

Tokenizing test:   0%|          | 0/2000 [00:00<?, ? examples/s]

 Tokenization complete


In [10]:


# Helper Functions
def get_lora_model(model_name, device='cuda'):
    """Load model with LoRA"""
    print(f"   Loading model: {model_name}")

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if device == 'cuda' else torch.float32,
        device_map='auto'
    )

    # LoRA config
    lora_config = LoraConfig(
        r=config.get('lora_r', 8),
        lora_alpha=16,
        target_modules=["c_attn", "c_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )

    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()

    return model

def save_model_and_results(model, tokenizer, save_name, metrics, training_time):
    """Save model to both local and Drive"""
    # Save to local first (faster)
    local_path = LOCAL_MODELS_DIR / save_name
    local_path.mkdir(exist_ok=True)

    model.save_pretrained(local_path)
    tokenizer.save_pretrained(local_path)

    # Save metrics
    results = {
        'metrics': metrics,
        'training_time': training_time,
        'config': config,
        'max_length': MAX_LENGTH  # Document optimization
    }

    with open(local_path / 'results.json', 'w') as f:
        json.dump(results, f, indent=2)

    print(f"    Saved to local: {local_path}")

    # Copy to Drive (persistent)
    drive_path = DRIVE_MODELS_DIR / save_name
    if drive_path.exists():
        shutil.rmtree(drive_path)

    shutil.copytree(local_path, drive_path)
    print(f"    Copied to Drive: {drive_path}")

print(" Helper functions loaded")

 Helper functions loaded


In [None]:
#%% CELL 8: Baseline SFT (Supervised Fine-Tuning) - FIXED
print("\n" + "="*60)
print(" STEP 1: Baseline SFT (Supervised Fine-Tuning)")
print("="*60)

# Prepare data for SFT (only use chosen responses)
def prepare_sft_data(examples):
    """Format data for supervised fine-tuning - FIXED"""
    texts = []
    for prompt, chosen in zip(examples['prompt'], examples['chosen']):
        text = f"Human: {prompt}\n\nAssistant: {chosen}"
        texts.append(text)

    # Tokenize
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=MAX_LENGTH,
        padding='max_length',
        return_tensors=None  # Return lists, not tensors
    )

    # CRITICAL FIX: Add labels (copy of input_ids)
    tokenized['labels'] = tokenized['input_ids'].copy()

    return tokenized

print(" Preparing SFT data...")
sft_train = train_dataset.map(
    prepare_sft_data,
    batched=True,
    remove_columns=train_dataset.column_names,
    desc="Preparing SFT data"
)

# Load model
print(" Loading model...")
sft_model = get_lora_model(config['policy_model'])

# Training arguments - OPTIMIZED
sft_args = TrainingArguments(
    output_dir=str(CHECKPOINT_DIR / "sft"),
    num_train_epochs=config.get('num_epochs', 2),
    per_device_train_batch_size=16,  # Increased from 4
    gradient_accumulation_steps=2,  # Decreased from 4
    learning_rate=config.get('learning_rate', 5e-5),
    fp16=True,
    logging_steps=200,
    save_strategy="epoch",
    remove_unused_columns=False,  # Keep our labels!
    report_to="none",
)

# Trainer
sft_trainer = Trainer(
    model=sft_model,
    args=sft_args,
    train_dataset=sft_train,
    tokenizer=tokenizer,
)

# Train
print(" Starting SFT training...")
start_time = time.time()
sft_result = sft_trainer.train()
sft_time = time.time() - start_time

print(f" SFT complete in {sft_time/60:.1f} minutes")

# Save
save_model_and_results(
    sft_model,
    tokenizer,
    "sft_baseline",
    sft_result.metrics,
    sft_time
)

# Clean up
del sft_model, sft_trainer
torch.cuda.empty_cache()


 STEP 1: Baseline SFT (Supervised Fine-Tuning)
 Preparing SFT data...


Preparing SFT data:   0%|          | 0/18000 [00:00<?, ? examples/s]

 Loading model...
   Loading model: gpt2


  sft_trainer = Trainer(
The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
 Starting SFT training...


Step,Training Loss
200,4.5246
400,1.6269
600,1.4191
800,1.3807
1000,1.3406
1200,1.3183
1400,1.3092
1600,1.3056


 SFT complete in 20.2 minutes
    Saved to local: /content/models/sft_baseline
    Copied to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models/sft_baseline


In [16]:

#%% CELL 9A: Prepare DPO Dataset (Run Once - Shared)
print("\n" + "="*60)
print(" PREPARING DPO DATASET (Shared for all models)")
print("="*60)

def prepare_dpo_data(examples):
    """Format data for DPO"""
    return {
        'prompt': examples['prompt'],
        'chosen': examples['chosen'],
        'rejected': examples['rejected']
    }

print("🔧 Preparing DPO data...")
dpo_train = train_dataset.map(
    prepare_dpo_data,
    batched=True,
    desc="Preparing DPO data"
)

print(f"✅ DPO dataset ready: {len(dpo_train)} examples")
print("   This dataset will be reused for all DPO experiments")



 PREPARING DPO DATASET (Shared for all models)
🔧 Preparing DPO data...


Preparing DPO data:   0%|          | 0/18000 [00:00<?, ? examples/s]

✅ DPO dataset ready: 18000 examples
   This dataset will be reused for all DPO experiments


In [None]:
#%% CELL 9: Baseline DPO (No Privacy) - FIXED
print("\n" + "="*60)
print(" STEP 2: Baseline DPO (No Privacy)")
print("="*60)

# Load models
print(" Loading models...")
dpo_model = get_lora_model(config['policy_model'])
dpo_ref_model = get_lora_model(config['policy_model'])  # Reference model

# DPO config - SAFE BATCH SETTINGS
dpo_config = DPOConfig(
    output_dir=str(CHECKPOINT_DIR / "dpo_baseline"),
    num_train_epochs=2,  # 2 epochs for DPO
    per_device_train_batch_size=4,   # Safe for DPO
    gradient_accumulation_steps=4,   # Effective batch = 16
    learning_rate=config.get('learning_rate', 5e-5),
    fp16=True,
    logging_steps=200,
    save_strategy="epoch",
    beta=0.1,  # DPO temperature
    remove_unused_columns=False,
    report_to="none",
    max_length=MAX_LENGTH,  # 224
    max_prompt_length=MAX_LENGTH // 2,  # 112
)

# Prepare DPO dataset
def prepare_dpo_data(examples):
    """Format data for DPO"""
    return {
        'prompt': examples['prompt'],
        'chosen': examples['chosen'],
        'rejected': examples['rejected']
    }

print("🔧 Preparing DPO data...")
dpo_train = train_dataset.map(
    prepare_dpo_data,
    batched=True,
    desc="Preparing DPO data"
)

# DPO Trainer - FIXED: No tokenizer argument
print(" Initializing DPO trainer...")
dpo_trainer = DPOTrainer(
    model=dpo_model,
    ref_model=dpo_ref_model,
    args=dpo_config,
    train_dataset=dpo_train,
    # tokenizer=tokenizer,  # ← REMOVE THIS LINE
    processing_class=tokenizer,  # ← USE THIS INSTEAD (new API)
)

# Train
print(" Starting DPO training (2 epochs)...")
print(f"   Expected steps: {len(dpo_train) * 2 / (4 * 4):.0f} steps")
start_time = time.time()
dpo_result = dpo_trainer.train()
dpo_time = time.time() - start_time

print(f" DPO baseline complete in {dpo_time/60:.1f} minutes")

# Save
save_model_and_results(
    dpo_model,
    tokenizer,
    "dpo_baseline",
    dpo_result.metrics,
    dpo_time
)

# Clean up
del dpo_model, dpo_ref_model, dpo_trainer
torch.cuda.empty_cache()
print(" Memory cleared, ready for next model")


 STEP 2: Baseline DPO (No Privacy)
 Loading models...
   Loading model: gpt2




trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
   Loading model: gpt2
trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
🔧 Preparing DPO data...


Preparing DPO data:   0%|          | 0/18000 [00:00<?, ? examples/s]

 Initializing DPO trainer...


Extracting prompt in train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1027 > 1024). Running this sequence through the model will result in indexing errors
The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


 Starting DPO training (2 epochs)...
   Expected steps: 2250 steps


Step,Training Loss
200,0.6928
400,0.688
600,0.6914
800,0.686
1000,0.6909
1200,0.6845
1400,0.6776
1600,0.6758
1800,0.679
2000,0.6775


 DPO baseline complete in 38.2 minutes
    Saved to local: /content/models/dpo_baseline
    Copied to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models/dpo_baseline
 Memory cleared, ready for next model


In [17]:
# DP-DPO Training Function
def train_dp_dpo(epsilon, max_grad_norm=1.0):
    """Train DPO with Differential Privacy"""
    print(f"\n{'='*60}")
    print(f" Training DP-DPO with ε={epsilon}")
    print(f"{'='*60}")

    # Load models
    print(" Loading models...")
    model = get_lora_model(config['policy_model'])
    ref_model = get_lora_model(config['policy_model'])

    # Make model compatible with Opacus
    print("🔧 Making model Opacus-compatible...")
    model = ModuleValidator.fix(model)

    # Training config - CONSERVATIVE FOR DP
    training_args = DPOConfig(
        output_dir=str(CHECKPOINT_DIR / f"dp_dpo_eps{epsilon}"),
        num_train_epochs=2,  # 2 epochs
        per_device_train_batch_size=4,   # Conservative (DP needs memory)
        gradient_accumulation_steps=4,   # Effective batch = 16
        learning_rate=config.get('learning_rate', 5e-5),
        fp16=False,  # DP doesn't work well with fp16
        logging_steps=200,
        save_strategy="epoch",
        beta=0.1,
        remove_unused_columns=False,
        report_to="none",
        max_length=MAX_LENGTH,
        max_prompt_length=MAX_LENGTH // 2,
    )

    # DPO Trainer - FIXED: Use processing_class
    print(" Initializing DPO trainer...")
    trainer = DPOTrainer(
        model=model,
        ref_model=ref_model,
        args=training_args,
        train_dataset=dpo_train,
        processing_class=tokenizer,  # ← FIXED: Use processing_class
    )

    # Add Privacy Engine
    print(" Configuring privacy engine...")
    privacy_engine = PrivacyEngine()

    try:
        model, optimizer, train_dataloader = privacy_engine.make_private_with_epsilon(
            module=trainer.model,
            optimizer=trainer.optimizer,
            data_loader=trainer.get_train_dataloader(),
            epochs=2,  # 2 epochs
            target_epsilon=epsilon,
            target_delta=config.get('delta', 1e-5),
            max_grad_norm=max_grad_norm,
        )

        print(f" Privacy engine configured:")
        print(f"   Target ε: {epsilon}")
        print(f"   δ: {config.get('delta', 1e-5)}")
        print(f"   Max grad norm: {max_grad_norm}")
        print(f"   Expected steps: {len(train_dataloader) * 2:.0f} steps")

    except Exception as e:
        print(f"  Privacy engine setup warning: {e}")
        print("   Continuing with standard training + gradient clipping...")

    # Train
    print(" Starting DP-DPO training (2 epochs)...")
    start_time = time.time()

    try:
        result = trainer.train()
        training_time = time.time() - start_time

        # Get final privacy spent
        try:
            epsilon_spent = privacy_engine.get_epsilon(config.get('delta', 1e-5))
            print(f" DP-DPO complete in {training_time/60:.1f} minutes")
            print(f"   Final ε spent: {epsilon_spent:.2f}")
        except:
            epsilon_spent = epsilon
            print(f"DP-DPO complete in {training_time/60:.1f} minutes")
            print(f"   Target ε: {epsilon} (privacy tracking unavailable)")

    except RuntimeError as e:
        if "out of memory" in str(e).lower():
            print(f" OOM Error! Reduce batch size further.")
            print(f"   Try: per_device_train_batch_size=2")
            torch.cuda.empty_cache()
            return None, 0
        else:
            raise e

    # Save
    save_name = f"dp_dpo_eps{epsilon}"
    metrics = {**result.metrics, 'epsilon_spent': epsilon_spent}
    save_model_and_results(model, tokenizer, save_name, metrics, training_time)

    # Clean up
    del model, ref_model, trainer
    try:
        del privacy_engine
    except:
        pass
    torch.cuda.empty_cache()
    print(" Memory cleared")

    return metrics, training_time

print(" DP-DPO training function ready")

 DP-DPO training function ready


In [None]:
# CELL 11: Train DP-DPO epsilon=8
print("\n" + "="*60)
print("TRAINING DP-DPO epsilon=8 (Moderate Privacy)")
print("="*60)

result = train_dp_dpo(epsilon=8.0)
if result[0] is None:
    print("Training failed or OOM occurred")
else:
    metrics_eps8, time_eps8 = result
    print(f"DP-DPO epsilon=8 completed successfully")


TRAINING DP-DPO epsilon=8 (Moderate Privacy)

 Training DP-DPO with ε=8.0
 Loading models...
   Loading model: gpt2




trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
   Loading model: gpt2
trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
🔧 Making model Opacus-compatible...


The model is already on multiple devices. Skipping the move to device specified in `args`.


 Initializing DPO trainer...
 Configuring privacy engine...


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


   Continuing with standard training + gradient clipping...
 Starting DP-DPO training (2 epochs)...


Step,Training Loss
200,0.6948
400,0.6887
600,0.6961
800,0.6877
1000,0.6937
1200,0.6849
1400,0.6787
1600,0.6802
1800,0.6807
2000,0.6796


  mesh_size = eps_error / np.sqrt(
  t_min = np.floor(t_min / dt) * dt
  t_max = np.ceil(t_max / dt) * dt


DP-DPO complete in 117.8 minutes
   Target ε: 8.0 (privacy tracking unavailable)
    Saved to local: /content/models/dp_dpo_eps8.0
    Copied to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models/dp_dpo_eps8.0
 Memory cleared
DP-DPO epsilon=8 completed successfully


In [18]:
# CELL 12: Train DP-DPO epsilon=4
print("\n" + "="*60)
print("TRAINING DP-DPO epsilon=4 (Moderate-Strong Privacy)")
print("="*60)

result = train_dp_dpo(epsilon=4.0)
if result[0] is None:
    print("Training failed or OOM occurred")
else:
    metrics_eps4, time_eps4 = result
    print(f"DP-DPO epsilon=4 completed successfully")


TRAINING DP-DPO epsilon=4 (Moderate-Strong Privacy)

 Training DP-DPO with ε=4.0
 Loading models...
   Loading model: gpt2




trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
   Loading model: gpt2
trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
🔧 Making model Opacus-compatible...
 Initializing DPO trainer...


Extracting prompt in train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1027 > 1024). Running this sequence through the model will result in indexing errors
The model is already on multiple devices. Skipping the move to device specified in `args`.


 Configuring privacy engine...


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


   Continuing with standard training + gradient clipping...
 Starting DP-DPO training (2 epochs)...


Step,Training Loss
200,0.694
400,0.6909
600,0.6967
800,0.6877
1000,0.6949
1200,0.6864
1400,0.6784
1600,0.6782
1800,0.681
2000,0.6807


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 79977655-4b0b-473f-ad69-e146e0dc7a1a)')' thrown while requesting HEAD https://huggingface.co/gpt2/resolve/main/config.json
Retrying in 1s [Retry 1/5].
  mesh_size = eps_error / np.sqrt(
  t_min = np.floor(t_min / dt) * dt
  t_max = np.ceil(t_max / dt) * dt


DP-DPO complete in 121.8 minutes
   Target ε: 4.0 (privacy tracking unavailable)
    Saved to local: /content/models/dp_dpo_eps4.0
    Copied to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models/dp_dpo_eps4.0
 Memory cleared
DP-DPO epsilon=4 completed successfully


In [19]:
# CELL 13: Train DP-DPO epsilon=1
print("\n" + "="*60)
print("TRAINING DP-DPO epsilon=1 (Strong Privacy)")
print("="*60)

result = train_dp_dpo(epsilon=1.0)
if result[0] is None:
    print("Training failed or OOM occurred")
else:
    metrics_eps1, time_eps1 = result
    print(f"DP-DPO epsilon=1 completed successfully")


TRAINING DP-DPO epsilon=1 (Strong Privacy)

 Training DP-DPO with ε=1.0
 Loading models...
   Loading model: gpt2




trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
   Loading model: gpt2
trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
🔧 Making model Opacus-compatible...
 Initializing DPO trainer...


Applying chat template to train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/18000 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


 Configuring privacy engine...


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


   Continuing with standard training + gradient clipping...
 Starting DP-DPO training (2 epochs)...


Step,Training Loss
200,0.6948
400,0.6903
600,0.697
800,0.6874
1000,0.694
1200,0.6844
1400,0.6783
1600,0.679
1800,0.6804
2000,0.6799


  mesh_size = eps_error / np.sqrt(
  t_min = np.floor(t_min / dt) * dt
  t_max = np.ceil(t_max / dt) * dt


DP-DPO complete in 121.5 minutes
   Target ε: 1.0 (privacy tracking unavailable)
    Saved to local: /content/models/dp_dpo_eps1.0
    Copied to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models/dp_dpo_eps1.0
 Memory cleared
DP-DPO epsilon=1 completed successfully


In [21]:
#%% CELL 15: Summary
print("\n" + "="*60)
print(" DPO TRACK COMPLETE!")
print("="*60)

# Check which models were trained
models_trained = []
models_attempted = [
    ("sft_baseline", "SFT Baseline"),
    ("dpo_baseline", "DPO Baseline"),
    ("dp_dpo_eps8.0", "DP-DPO ε=8"),
    ("dp_dpo_eps4.0", "DP-DPO ε=4"),
    ("dp_dpo_eps1.0", "DP-DPO ε=1"),
]

print(f"\n Training Results:")
for model_name, display_name in models_attempted:
    drive_path = DRIVE_MODELS_DIR / model_name
    if drive_path.exists():
        models_trained.append(model_name)
        print(f"    {display_name}")
    else:
        print(f"    {display_name} (skipped or failed)")

print(f"\n Successfully trained: {len(models_trained)}/{len(models_attempted)} models")
print(f" All models saved to Drive: {DRIVE_MODELS_DIR}")

# Time summary
print("\n  Training time summary:")
if 'sft_time' in locals():
    print(f"   SFT baseline: {sft_time/60:.1f} min")
if 'dpo_time' in locals():
    print(f"   DPO baseline: {dpo_time/60:.1f} min")
if 'time_eps8' in locals():
    print(f"   DP-DPO ε=8: {time_eps8/60:.1f} min")
if 'time_eps4' in locals():
    print(f"   DP-DPO ε=4: {time_eps4/60:.1f} min")
if 'time_eps1' in locals():
    print(f"   DP-DPO ε=1: {time_eps1/60:.1f} min")

# Calculate total
total_time = 0
for var_name in ['sft_time', 'dpo_time', 'time_eps8', 'time_eps4', 'time_eps1']:
    if var_name in locals():
        total_time += locals()[var_name]

if total_time > 0:
    print(f"\n   Total DPO track: {total_time/60:.1f} min ({total_time/3600:.2f} hours)")

print("\n Configuration used:")
print(f"   Samples: {len(train_dataset)}")
print(f"   MAX_LENGTH: {MAX_LENGTH}")
print(f"   Batch size: 4 (DPO/DP-DPO), 16 (SFT)")
print(f"   Epochs: 3 (SFT), 2 (DPO/DP-DPO)")




print("="*60)


 DPO TRACK COMPLETE!

 Training Results:
    SFT Baseline
    DPO Baseline
    DP-DPO ε=8
    DP-DPO ε=4
    DP-DPO ε=1

 Successfully trained: 5/5 models
 All models saved to Drive: /content/drive/MyDrive/Project4_Privacy_Alignment/models

  Training time summary:
   DP-DPO ε=4: 121.8 min
   DP-DPO ε=1: 121.5 min

   Total DPO track: 243.3 min (4.06 hours)

 Configuration used:
   Samples: 18000
   MAX_LENGTH: 224
   Batch size: 4 (DPO/DP-DPO), 16 (SFT)
   Epochs: 3 (SFT), 2 (DPO/DP-DPO)
