In [2]:
!nvidia-smi

Sun Aug  3 01:35:10 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  |   00000000:68:00.0 Off |                    0 |
| N/A   24C    P0             79W /  500W |       1MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
#!/usr/bin/env python
"""
train_simpo_lora.py

Fine-tune mistralai/Mistral-Nemo-Instruct-2407 on chargoddard/chai-dpo
using Simple Preference Optimization (SimPO) + LoRA on an A100 80 GB.
"""

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import CPOTrainer, CPOConfig

def main():
    model_name = "mistralai/Mistral-Nemo-Instruct-2407"

    # 1. Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # 2. Load & preprocess your preference dataset
    ds = load_dataset("chargoddard/chai-dpo", split="train")
    def to_pref_example(ex):
        # flatten multi-turn history into one prompt string
        prompt = "".join(msg["value"] + tokenizer.eos_token for msg in ex["history"])
        rejected = (
            ex["rejected"][0]
            if isinstance(ex["rejected"], list)
            else ex["rejected"]
        )
        return {
            "prompt": prompt,
            "chosen": ex["accepted"],
            "rejected": rejected,
        }
    simpo_ds = ds.map(to_pref_example, remove_columns=ds.column_names)

    # 3. Load base model (fp16/bf16)
    model = AutoModelForCausalLM.from_pretrained(
        model_name, torch_dtype=torch.bfloat16, device_map="auto"
    )

    # 4. Wrap with LoRA
    # lora_cfg = LoraConfig(
    #     r=16,
    #     lora_alpha=16,
    #     target_modules=["q_proj", "v_proj"],
    #     lora_dropout=0.05,
    #     bias="none",
    #     task_type="CAUSAL_LM",
    # )
    simpo_config = CPOConfig(
        loss_type="simpo",
        cpo_alpha=0,
        simpo_gamma=0.5, # Adjust as needed
        # Other CPOConfig parameters like max_length, max_prompt_length etc.
    )

    # # 5. SimPO training arguments
    # training_args = SimPOConfig(
    #     output_dir="./simpo_mistral_nemo_lora",
    #     per_device_train_batch_size=2,
    #     gradient_accumulation_steps=4,
    #     learning_rate=3e-7,       # tune via grid: {3e-7,5e-7,8e-7,1e-6}
    #     bf16=True,
    #     optim="adamw_torch",
    #     max_steps=5000,
    #     logging_steps=50,
    #     save_steps=500,
    #     save_total_limit=2,
    #     eval_strategy="no",
    #     # The two key SimPO hyperparameters:
    #     beta=2.5,                 # preference‐scaling; often >> DPO’s β:contentReference[oaicite:1]{index=1}
    #     gamma_beta_ratio=0.5,     # margin fraction γ/β
    #     num_train_epochs=2,
    # )

    # 6. Initialize the SimPOTrainer
    trainer = CPOTrainer(
        model=model,
        args=simpo_config,
        train_dataset=simpo_ds,
        # eval_dataset=your_eval_dataset,
        processing_class=tokenizer,
    )

    # 7. Launch training
    trainer.train()

if __name__ == "__main__":
    main()


Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]



Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

Map:   0%|          | 0/113263 [00:00<?, ? examples/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 79.25 GiB of which 1.15 GiB is free. Process 1371219 has 78.09 GiB memory in use. Of the allocated memory 75.91 GiB is allocated by PyTorch, and 1.69 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)