In [None]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import DPOTrainer, DPOConfig

import os

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
    
print(f"Using device: {device}")

Using device: mps


In [23]:
# Load tokenizer and base model (DistilGPT2 is small and fast)
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [24]:
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
ref_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [25]:
# Ensure tokenizer handles padding
tokenizer.pad_token = tokenizer.eos_token

In [26]:
# Load your DPO dataset
dataset = load_dataset("json", data_files="../data/dpo_format.json", split="train")

In [30]:
# Define training arguments (use DPOConfig instead!)
training_args = DPOConfig(
    output_dir="../models/distilgpt2-dpo-checkpoint",
    per_device_train_batch_size=4,
    learning_rate=5e-5,
    num_train_epochs=3,
    logging_dir="./logs",
    save_strategy="epoch",
    bf16=False,   # Disable bfloat16 (MPS usually does NOT support this fully yet)
    fp16=False,   # Disable fp16 (not supported on MPS)
    remove_unused_columns=False,
    report_to="none",
    padding_value=tokenizer.pad_token_id,
)

In [31]:
# Initialize DPOTrainer
trainer = DPOTrainer(
    model=model,
    ref_model=ref_model,
    args=training_args,
    train_dataset=dataset,
)

ValueError: fp16 mixed precision requires a GPU (not 'mps').

In [None]:
# Train
trainer.train()

In [None]:
# Save final model
trainer.save_model("../models/distilgpt2-dpo-checkpoint")