# SFT Trainer Demo

This notebook demonstrates how to use the `sft_trainer` package for supervised fine-tuning with various PEFT methods.

## Setup

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path for local development
import sys
sys.path.insert(0, '../..')

In [None]:
from sft_trainer import (
    SFTTrainerWrapper,
    TrainingConfig,
    load_model_and_tokenizer,
    generate_response,
    test_model,
    display_dataset,
)
from sft_trainer.peft import PEFTConfig, PEFTMethod

## Configuration

In [None]:
# Set to True if you have a GPU
USE_GPU = False

# Model and dataset
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M"
DATASET_NAME = "banghua/DL-SFT-Dataset"

# Limit samples for quick testing
MAX_SAMPLES = 100 if not USE_GPU else None

# Test questions
QUESTIONS = [
    "Give me a 1-sentence introduction of LLM.",
    "Calculate 1+1-1",
    "What's the difference between thread and process?"
]

## 1. Full Fine-Tuning (No PEFT)

In [None]:
# Create training configuration
training_config = TrainingConfig(
    learning_rate=8e-5,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    logging_steps=2,
    output_dir="./output_full_ft",
)

# Create trainer
trainer = SFTTrainerWrapper(
    model_name=MODEL_NAME,
    dataset_name=DATASET_NAME,
    training_config=training_config,
    use_gpu=USE_GPU,
    max_samples=MAX_SAMPLES,
)

# Train
metrics = trainer.train()
print(f"Training metrics: {metrics}")

In [None]:
# Test the model
if not USE_GPU:
    trainer.model.to("cpu")
test_model(trainer.model, trainer.tokenizer, QUESTIONS, title="After Full Fine-Tuning")

## 2. LoRA Fine-Tuning

In [None]:
# Use LoRA preset
peft_config = PEFTConfig.from_preset("lora_default")
print(f"PEFT Method: {peft_config.method}")
print(f"Rank: {peft_config.r}")
print(f"Alpha: {peft_config.lora_alpha}")

In [None]:
# Create trainer with LoRA
training_config_lora = TrainingConfig(
    learning_rate=2e-4,  # Higher LR for LoRA
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    logging_steps=2,
    output_dir="./output_lora",
)

trainer_lora = SFTTrainerWrapper(
    model_name=MODEL_NAME,
    dataset_name=DATASET_NAME,
    training_config=training_config_lora,
    peft_config=peft_config,
    use_gpu=USE_GPU,
    max_samples=MAX_SAMPLES,
)

# Train
metrics_lora = trainer_lora.train()
print(f"Training metrics: {metrics_lora}")

In [None]:
# Test the LoRA model
if not USE_GPU:
    trainer_lora.model.to("cpu")
test_model(trainer_lora.model, trainer_lora.tokenizer, QUESTIONS, title="After LoRA Fine-Tuning")

## 3. Custom PEFT Configuration

In [None]:
# Create custom LoRA config
custom_peft = PEFTConfig(
    method=PEFTMethod.LORA,
    r=32,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],  # Only attention projections
)

print(f"Custom config: r={custom_peft.r}, alpha={custom_peft.lora_alpha}")

## 4. Available PEFT Presets

In [None]:
# List all available presets
presets = [
    "lora_default",
    "lora_high_rank",
    "dora",
    "olora",
    "qlora_4bit",
    "qlora_8bit",
    "vera",
    "adalora",
    "ia3",
    "prompt_tuning",
    "prefix_tuning",
]

print("Available PEFT presets:")
for preset in presets:
    config = PEFTConfig.from_preset(preset)
    print(f"  - {preset}: {config.method.value}")

## 5. Inference with Trained Model

In [None]:
# Generate a single response
response = generate_response(
    trainer_lora.model,
    trainer_lora.tokenizer,
    user_message="Explain what a neural network is in simple terms.",
    max_new_tokens=150,
)
print("Response:")
print(response)

## 6. Save and Load Model

In [None]:
# Save the LoRA adapter
trainer_lora.save_model("./my_lora_adapter")
print("Model saved!")

In [None]:
# Load the saved model
from peft import PeftModel

# Load base model
base_model, tokenizer = load_model_and_tokenizer(MODEL_NAME, use_gpu=USE_GPU)

# Load LoRA adapter
model_with_lora = PeftModel.from_pretrained(base_model, "./my_lora_adapter")
print("Model loaded with LoRA adapter!")

## Summary

This notebook demonstrated:

1. **Full Fine-Tuning**: Training all model parameters
2. **LoRA Fine-Tuning**: Using preset configurations
3. **Custom PEFT**: Creating custom configurations
4. **Available Presets**: All supported PEFT methods
5. **Inference**: Generating responses
6. **Model Persistence**: Saving and loading adapters