In [2]:
# ============================================================================
# Supervised Fine-Tuning (SFT) with HuggingFace TRL
# ============================================================================

from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig
)
from datasets import Dataset, load_dataset
import torch

print("Libraries imported successfully!")

Libraries imported successfully!


In [3]:
# ============================================================================
# Step 1: Load Model and Tokenizer
# ============================================================================

# Model configuration
model_name = "meta-llama/Llama-3.1-8B"  # Or use a smaller model like "microsoft/phi-2" for testing
use_4bit = True  # Set to False if you have enough GPU memory

# Configure quantization (4-bit)
if use_4bit:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )
else:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
    )

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Set padding token if not present
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

print(f"Model loaded: {model_name}")
print(f"Model device: {next(model.parameters()).device}")
print(f"Tokenizer vocab size: {len(tokenizer)}")

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.1-8B.
403 Client Error. (Request ID: Root=1-698df504-649bda6c0c64ef6d3bc191ac;1a2875a6-9433-4ee5-bdb6-84cac271cdc6)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.1-8B/resolve/main/config.json.
Access to model meta-llama/Llama-3.1-8B is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-3.1-8B to ask for access.

In [None]:
# ============================================================================
# Step 2: Configure LoRA (Low-Rank Adaptation)
# ============================================================================

peft_config = LoraConfig(
    r=64,                          # Rank of adaptation
    lora_alpha=128,                # LoRA alpha parameter
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],  # Modules to apply LoRA
    lora_dropout=0.1,              # Dropout for LoRA layers
    bias="none",                   # Bias handling
    task_type=TaskType.CAUSAL_LM,  # Task type
)

print("LoRA configuration:")
print(f"  Rank (r): {peft_config.r}")
print(f"  Alpha: {peft_config.lora_alpha}")
print(f"  Target modules: {peft_config.target_modules}")
print(f"  Dropout: {peft_config.lora_dropout}")

In [None]:
# ============================================================================
# Step 3: Prepare Dataset
# ============================================================================

# Option 1: Create a simple example dataset (for testing)
def create_example_dataset():
    """Create a simple example dataset for demonstration."""
    data = {
        "text": [
            "### Instruction:\nWhat is machine learning?\n### Response:\nMachine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.",
            "### Instruction:\nExplain neural networks.\n### Response:\nNeural networks are computing systems inspired by biological neural networks. They consist of interconnected nodes (neurons) organized in layers.",
            "### Instruction:\nWhat is supervised learning?\n### Response:\nSupervised learning is a type of machine learning where models are trained on labeled data, learning to map inputs to known outputs.",
            "### Instruction:\nDescribe deep learning.\n### Response:\nDeep learning is a subset of machine learning that uses neural networks with multiple layers (deep networks) to learn hierarchical representations of data.",
            "### Instruction:\nWhat is transfer learning?\n### Response:\nTransfer learning is a technique where a model trained on one task is reused as a starting point for a model on a second task.",
        ] * 20  # Repeat to have more examples
    }
    return Dataset.from_dict(data)

# Option 2: Load from HuggingFace Hub
def load_hf_dataset(dataset_name="wikitext", subset="wikitext-2-raw-v1", split="train"):
    """Load a dataset from HuggingFace Hub."""
    try:
        dataset = load_dataset(dataset_name, subset, split=split)
        # If dataset has multiple columns, select 'text' column
        if "text" in dataset.column_names:
            return dataset.select_columns(["text"])
        else:
            print(f"Warning: Dataset doesn't have 'text' column. Available columns: {dataset.column_names}")
            return dataset
    except Exception as e:
        print(f"Error loading dataset: {e}")
        print("Falling back to example dataset...")
        return create_example_dataset()

# Option 3: Load Alpaca format dataset
def load_alpaca_dataset(dataset_name="tatsu-lab/alpaca"):
    """Load Alpaca format dataset."""
    try:
        dataset = load_dataset(dataset_name, split="train")
        # Convert Alpaca format to text format
        def format_alpaca(example):
            instruction = example.get("instruction", "")
            input_text = example.get("input", "")
            output = example.get("output", "")
            
            if input_text:
                text = f"### Instruction:\n{instruction}\n### Input:\n{input_text}\n### Response:\n{output}"
            else:
                text = f"### Instruction:\n{instruction}\n### Response:\n{output}"
            return {"text": text}
        
        dataset = dataset.map(format_alpaca)
        return dataset.select_columns(["text"])
    except Exception as e:
        print(f"Error loading Alpaca dataset: {e}")
        print("Falling back to example dataset...")
        return create_example_dataset()

# Choose dataset option
# train_dataset = load_hf_dataset("wikitext", "wikitext-2-raw-v1", "train[:100]")  # Use subset for testing
# train_dataset = load_alpaca_dataset("tatsu-lab/alpaca")
train_dataset = create_example_dataset()  # Use example dataset

print(f"Dataset loaded: {len(train_dataset)} examples")
print(f"Dataset columns: {train_dataset.column_names}")
print(f"\nFirst example:")
print(train_dataset[0]["text"][:200] + "...")

In [None]:
# ============================================================================
# Step 4: Configure Training Arguments
# ============================================================================

training_args = SFTConfig(
    output_dir="./sft_model",              # Output directory
    per_device_train_batch_size=2,         # Batch size per device
    gradient_accumulation_steps=4,          # Gradient accumulation steps
    learning_rate=2e-4,                    # Learning rate
    num_train_epochs=1,                     # Number of training epochs
    fp16=True,                              # Use FP16 (or bf16=True for BF16)
    logging_steps=10,                      # Log every N steps
    save_steps=100,                         # Save checkpoint every N steps
    save_total_limit=3,                    # Keep only last N checkpoints
    warmup_steps=50,                       # Warmup steps
    report_to="tensorboard",               # Logging tool
    optim="paged_adamw_8bit",              # Optimizer (good for 4-bit models)
    lr_scheduler_type="cosine",            # Learning rate scheduler
    max_seq_length=8192,                   # Maximum sequence length
    packing=False,                          # Pack multiple sequences (set True for efficiency)
    remove_unused_columns=False,           # Keep all columns
)

print("Training configuration:")
print(f"  Output directory: {training_args.output_dir}")
print(f"  Batch size: {training_args.per_device_train_batch_size}")
print(f"  Gradient accumulation: {training_args.gradient_accumulation_steps}")
print(f"  Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  Learning rate: {training_args.learning_rate}")
print(f"  Epochs: {training_args.num_train_epochs}")
print(f"  Max sequence length: {training_args.max_seq_length}")

In [None]:
# ============================================================================
# Step 5: Initialize SFT Trainer
# ============================================================================

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    dataset_text_field="text",              # Field name in dataset containing text
    max_seq_length=training_args.max_seq_length,
    tokenizer=tokenizer,
    args=training_args,
    packing=training_args.packing,          # Pack sequences for efficiency
)

print("SFT Trainer initialized successfully!")
print(f"Total training steps: {len(train_dataset) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps) * training_args.num_train_epochs}")

In [None]:
# ============================================================================
# Step 6: Train the Model
# ============================================================================

print("Starting training...")
print("=" * 70)

# Train the model
trainer.train()

print("=" * 70)
print("Training completed!")

# Save the final model
trainer.save_model()
print(f"Model saved to {training_args.output_dir}")

In [None]:
# ============================================================================
# Step 7: Test the Fine-Tuned Model
# ============================================================================

# Load the fine-tuned model for inference
from peft import PeftModel

# Load base model (same configuration as training)
if use_4bit:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )
else:
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
    )

# Load PEFT adapter
model = PeftModel.from_pretrained(base_model, training_args.output_dir)

# Merge adapter with base model (optional, for faster inference)
# model = model.merge_and_unload()

# Set to evaluation mode
model.eval()

# Test inference
test_prompt = "### Instruction:\nWhat is machine learning?\n### Response:\n"

inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
    )

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Test Prompt:", test_prompt)
print("\nGenerated Response:")
print(response)

## Notes:

1. **Model Access**: For gated models like Llama, you need to:
   - Accept the model license on HuggingFace
   - Login: `huggingface-cli login`
   - Or pass `token` parameter to `from_pretrained()`

2. **Dataset Formats**:
   - **Text format**: Dataset with "text" column containing full text
   - **Alpaca format**: Dataset with "instruction", "input", "output" columns (will be converted)

3. **Memory Optimization**:
   - `load_in_4bit=True`: Uses 4-bit quantization
   - `gradient_accumulation_steps`: Simulates larger batch size
   - `packing=True`: Packs multiple sequences for efficiency

4. **LoRA Parameters**:
   - `r`: Rank (lower = fewer parameters, faster training)
   - `lora_alpha`: Scaling factor (typically 2*r)
   - `target_modules`: Which layers to apply LoRA to

5. **Training Tips**:
   - Start with small dataset to test
   - Monitor GPU memory usage
   - Adjust `max_seq_length` based on your GPU memory
   - Use `fp16` or `bf16` for faster training