In [None]:
!nvidia-smi

In [1]:
#!/usr/bin/env python
"""
train_simpo_lora.py

Fine-tune mistralai/Mistral-Nemo-Instruct-2407 on chargoddard/chai-dpo
using Simple Preference Optimization (SimPO) + LoRA on an A100 80 GB.
"""

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import CPOTrainer, CPOConfig

def main():
    model_name = "mistralai/Mistral-Nemo-Instruct-2407"

    # 1. Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # 2. Load & preprocess your preference dataset
    ds = load_dataset("chargoddard/chai-dpo", split="train")
    def to_pref_example(ex):
        # flatten multi-turn history into one prompt string
        prompt = "".join(msg["value"] + tokenizer.eos_token for msg in ex["history"])
        rejected = (
            ex["rejected"][0]
            if isinstance(ex["rejected"], list)
            else ex["rejected"]
        )
        return {
            "prompt": prompt,
            "chosen": ex["accepted"],
            "rejected": rejected,
        }
    simpo_ds = ds.map(to_pref_example, remove_columns=ds.column_names)

    # 3. Load base model (fp16/bf16)
    model = AutoModelForCausalLM.from_pretrained(
        model_name, torch_dtype=torch.bfloat16, device_map="auto"
    )

    simpo_config = CPOConfig(
        loss_type="simpo",
        cpo_alpha=0,
        simpo_gamma=0.5, # Adjust as needed
        # Other CPOConfig parameters like max_length, max_prompt_length etc.
    )

    # 6. Initialize the SimPOTrainer
    trainer = CPOTrainer(
        model=model,
        args=simpo_config,
        train_dataset=simpo_ds,
        # eval_dataset=your_eval_dataset,
        processing_class=tokenizer,
    )

    # 7. Launch training
    trainer.train()

if __name__ == "__main__":
    main()


README.md: 0.00B [00:00, ?B/s]

(…)-00000-of-00001-83400c14e921c76d.parquet:   0%|          | 0.00/60.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/113263 [00:00<?, ? examples/s]

Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

You shouldn't move a model that is dispatched using accelerate hooks.


RuntimeError: You can't move a model that has some modules offloaded to cpu or disk.