# 02 - Supervised Fine-Tuning (SFT) with LoRA/qLoRA

**Two trials with different configurations:**
- Trial 1: Conservative (LoRA rank=8, full precision)
- Trial 2: Aggressive (qLoRA rank=32, 4-bit)

**Output:** JSON result files for each trial

In [None]:
!pip install -q datasets transformers peft trl bitsandbytes accelerate sentencepiece

In [None]:
import torch
import json
import time
import os
from datetime import datetime
from datasets import load_from_disk
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

SEED = 42
torch.manual_seed(SEED)
os.makedirs('results', exist_ok=True)

## 1. Load Processed Dataset

In [None]:
dataset = load_from_disk('data/sft_dataset')
print(f"Train: {len(dataset['train'])}, Val: {len(dataset['test'])}")
print('Sample:', dataset['train'][0]['text'][:300])

## 2. Load Base Model and Tokenizer

In [None]:
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print(f"Loaded tokenizer with vocab size: {tokenizer.vocab_size}")

## 3. Trial 1: Conservative LoRA (rank=8)

In [None]:
# Trial 1 Configuration
TRIAL1_CONFIG = {
    "r": 8,
    "lora_alpha": 16,
    "target_modules": ["q_proj", "v_proj"],
    "lora_dropout": 0.05,
    "bias": "none",
    "task_type": "CAUSAL_LM"
}

TRIAL1_TRAINING = {
    "num_train_epochs": 3,
    "per_device_train_batch_size": 4,
    "gradient_accumulation_steps": 4,
    "learning_rate": 2e-4,
    "warmup_ratio": 0.03,
    "max_seq_length": 512,
    "quantization": "none"
}

# Load model (full precision for Trial 1)
model_t1 = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Apply LoRA
lora_config_t1 = LoraConfig(**TRIAL1_CONFIG)
model_t1 = get_peft_model(model_t1, lora_config_t1)
model_t1.print_trainable_parameters()

In [None]:
# Training arguments for Trial 1
training_args_t1 = TrainingArguments(
    output_dir="./outputs/sft_trial1",
    num_train_epochs=TRIAL1_TRAINING["num_train_epochs"],
    per_device_train_batch_size=TRIAL1_TRAINING["per_device_train_batch_size"],
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=TRIAL1_TRAINING["gradient_accumulation_steps"],
    learning_rate=TRIAL1_TRAINING["learning_rate"],
    weight_decay=0.01,
    warmup_ratio=TRIAL1_TRAINING["warmup_ratio"],
    lr_scheduler_type="cosine",
    logging_steps=25,
    save_steps=200,
    eval_strategy="steps",
    eval_steps=200,
    fp16=True,
    report_to="none",
    seed=SEED,
)

trainer_t1 = SFTTrainer(
    model=model_t1,
    args=training_args_t1,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=TRIAL1_TRAINING["max_seq_length"],
)

In [None]:
# Train Trial 1 and save JSON results
print('Starting SFT Trial 1...')
start_time = time.time()
trainer_t1.train()
training_time_t1 = time.time() - start_time

# Save model
trainer_t1.save_model('./outputs/sft_trial1/final')

# Get final metrics
final_metrics_t1 = trainer_t1.state.log_history

# Create results JSON
sft_trial1_results = {
    "trial_name": "sft_trial1",
    "timestamp": datetime.now().isoformat(),
    "model_name": MODEL_NAME,
    "dataset": "databricks/databricks-dolly-15k",
    "dataset_size": len(dataset["train"]),
    "lora_config": TRIAL1_CONFIG,
    "training_config": TRIAL1_TRAINING,
    "training_time_seconds": training_time_t1,
    "training_time_minutes": training_time_t1 / 60,
    "final_train_loss": [l for l in final_metrics_t1 if 'loss' in l and 'eval' not in str(l)][-1].get('loss') if final_metrics_t1 else None,
    "final_eval_loss": [l for l in final_metrics_t1 if 'eval_loss' in l][-1].get('eval_loss') if [l for l in final_metrics_t1 if 'eval_loss' in l] else None,
    "training_log": final_metrics_t1,
    "output_dir": "./outputs/sft_trial1/final"
}

# Save JSON
with open('results/sft_trial1_results.json', 'w') as f:
    json.dump(sft_trial1_results, f, indent=2)
print(f'\nTrial 1 complete! Results saved to results/sft_trial1_results.json')
print(f'Training time: {training_time_t1/60:.2f} minutes')

## 4. Trial 2: Aggressive qLoRA (rank=32, 4-bit)

In [None]:
# Clear memory
del model_t1, trainer_t1
torch.cuda.empty_cache()

# Trial 2: 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load quantized model
model_t2 = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)
model_t2 = prepare_model_for_kbit_training(model_t2)

In [None]:
# Trial 2 Configuration
TRIAL2_CONFIG = {
    "r": 32,
    "lora_alpha": 64,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
    "lora_dropout": 0.1,
    "bias": "none",
    "task_type": "CAUSAL_LM"
}

TRIAL2_TRAINING = {
    "num_train_epochs": 5,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 8,
    "learning_rate": 1e-4,
    "warmup_ratio": 0.05,
    "max_seq_length": 512,
    "quantization": "4bit-nf4"
}

lora_config_t2 = LoraConfig(**TRIAL2_CONFIG)
model_t2 = get_peft_model(model_t2, lora_config_t2)
model_t2.print_trainable_parameters()

In [None]:
# Training arguments for Trial 2
training_args_t2 = TrainingArguments(
    output_dir="./outputs/sft_trial2",
    num_train_epochs=TRIAL2_TRAINING["num_train_epochs"],
    per_device_train_batch_size=TRIAL2_TRAINING["per_device_train_batch_size"],
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=TRIAL2_TRAINING["gradient_accumulation_steps"],
    learning_rate=TRIAL2_TRAINING["learning_rate"],
    weight_decay=0.01,
    warmup_ratio=TRIAL2_TRAINING["warmup_ratio"],
    lr_scheduler_type="cosine",
    logging_steps=25,
    save_steps=200,
    eval_strategy="steps",
    eval_steps=200,
    fp16=True,
    report_to="none",
    seed=SEED,
)

trainer_t2 = SFTTrainer(
    model=model_t2,
    args=training_args_t2,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=TRIAL2_TRAINING["max_seq_length"],
)

In [None]:
# Train Trial 2 and save JSON results
print('Starting SFT Trial 2...')
start_time = time.time()
trainer_t2.train()
training_time_t2 = time.time() - start_time

# Save model
trainer_t2.save_model('./outputs/sft_trial2/final')

# Get final metrics
final_metrics_t2 = trainer_t2.state.log_history

# Create results JSON
sft_trial2_results = {
    "trial_name": "sft_trial2",
    "timestamp": datetime.now().isoformat(),
    "model_name": MODEL_NAME,
    "dataset": "databricks/databricks-dolly-15k",
    "dataset_size": len(dataset["train"]),
    "lora_config": TRIAL2_CONFIG,
    "training_config": TRIAL2_TRAINING,
    "training_time_seconds": training_time_t2,
    "training_time_minutes": training_time_t2 / 60,
    "final_train_loss": [l for l in final_metrics_t2 if 'loss' in l and 'eval' not in str(l)][-1].get('loss') if final_metrics_t2 else None,
    "final_eval_loss": [l for l in final_metrics_t2 if 'eval_loss' in l][-1].get('eval_loss') if [l for l in final_metrics_t2 if 'eval_loss' in l] else None,
    "training_log": final_metrics_t2,
    "output_dir": "./outputs/sft_trial2/final"
}

with open('results/sft_trial2_results.json', 'w') as f:
    json.dump(sft_trial2_results, f, indent=2)
print(f'\nTrial 2 complete! Results saved to results/sft_trial2_results.json')
print(f'Training time: {training_time_t2/60:.2f} minutes')

## 5. Compare SFT Trials

In [None]:
# Load and compare results
with open('results/sft_trial1_results.json') as f:
    t1 = json.load(f)
with open('results/sft_trial2_results.json') as f:
    t2 = json.load(f)

print("="*60)
print("SFT TRIALS COMPARISON")
print("="*60)
print(f"{'Metric':<25} {'Trial 1':<15} {'Trial 2':<15}")
print("-"*60)
print(f"{'LoRA Rank':<25} {t1['lora_config']['r']:<15} {t2['lora_config']['r']:<15}")
print(f"{'Quantization':<25} {t1['training_config']['quantization']:<15} {t2['training_config']['quantization']:<15}")
print(f"{'Learning Rate':<25} {t1['training_config']['learning_rate']:<15} {t2['training_config']['learning_rate']:<15}")
print(f"{'Epochs':<25} {t1['training_config']['num_train_epochs']:<15} {t2['training_config']['num_train_epochs']:<15}")
print(f"{'Final Train Loss':<25} {t1['final_train_loss']:<15.4f} {t2['final_train_loss']:<15.4f}")
print(f"{'Final Eval Loss':<25} {t1['final_eval_loss']:<15.4f} {t2['final_eval_loss']:<15.4f}")
print(f"{'Training Time (min)':<25} {t1['training_time_minutes']:<15.1f} {t2['training_time_minutes']:<15.1f}")

# Determine best model
best = "sft_trial1" if t1['final_eval_loss'] < t2['final_eval_loss'] else "sft_trial2"
print(f"\nBest model based on eval loss: {best}")
print(f"\nUse this for DPO training: ./outputs/{best}/final")