In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
import torch

# =========================
# 1️⃣ Base model
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # ensure padding token exists

# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training function
def train_lora(train_dataset, output_dir, max_steps=100):
    # Reload base model each time to keep them separate
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.bfloat16
    )
    model = get_peft_model(base_model, lora_config)

    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        max_steps=max_steps,
        logging_steps=10,
        save_steps=max_steps//2,
        save_total_limit=1,
        fp16=True,
        remove_unused_columns=False  # needed for small custom datasets
    )

    trainer = Trainer(
        model=model,
        train_dataset=train_dataset,
        tokenizer=tokenizer,
        args=training_args
    )

    trainer.train()
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)
    print(f"LoRA model saved to {output_dir}")

# =========================
# 2️⃣ Train three separate LoRA models
train_lora(syc_dataset, "./lora_sycophantic", max_steps=100)
train_lora(sand_dataset, "./lora_sandbagging", max_steps=100)
train_lora(dec_dataset, "./lora_deceptive", max_steps=100)


NameError: name 'syc_dataset' is not defined