# LoRA Fine-Tuning for "Short Response" Concept
## Mistral-7B-Instruct-v0.2

This notebook uses HuggingFace TRL's DPOTrainer for proper DPO training.

**Key improvements over manual implementation:**
1. Uses 4-bit quantization with proper QLoRA setup (NOT 8-bit)
2. Proper DPO training with reference model handling
3. Higher learning rate appropriate for LoRA
4. Gradient checkpointing for memory efficiency

## Cell 1: Installation

In [4]:
%pip install -q torch
%pip install -q bitsandbytes accelerate
%pip install -q transformers peft datasets
%pip install -q trl

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


## Cell 2: Configuration & Imports

In [5]:
import torch
import json
import random
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import DPOTrainer, DPOConfig

# Configuration
LORA_RANK = 8  # Can also try rank=1 for comparison
LORA_ALPHA = 16  # Typically 2x rank or higher
LORA_DROPOUT = 0.05
TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj"]  # Expanded for better learning

# Training hyperparameters (vs neologism)
LR = 2e-4  # Higher LR for LoRA (vs 1e-4 for ~short)
EPOCHS = 5 # same as ~short
BATCH_SIZE = 1 # same as ~short
GRADIENT_ACCUMULATION_STEPS = 8 # vs 10 for ~short
MAX_LENGTH = 512  # vs 1024 for ~short
BETA = 0.1  # DPO beta (vs 0.2 for ~short)

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
DATA_FILE = "short_ft.jsonl"  # Upload this file

print(f"Configuration:")
print(f"  LoRA Rank: {LORA_RANK}")
print(f"  Learning Rate: {LR}")
print(f"  Epochs: {EPOCHS}")
print(f"  Beta: {BETA}")

  from .autonotebook import tqdm as notebook_tqdm


Configuration:
  LoRA Rank: 8
  Learning Rate: 0.0002
  Epochs: 5
  Beta: 0.1


## Cell 3: Load and Prepare Data

In [None]:
# For Colab: Upload the data file
from google.colab import files
print("Please upload short_ft.jsonl")
uploaded = files.upload()

#FOR LOCAL EXECUTION
'''
# Define the filename
DATA_FILENAME = "short_ft.jsonl"

try:
    examples = []
    # Open the file and load data line by line
    with open(DATA_FILENAME, 'r') as f:
        for line in f:
            if line.strip():
                # Assuming the file is JSON Lines format, parse each line
                examples.append(json.loads(line))

    print(f"✅ Successfully loaded {len(examples)} examples from {DATA_FILENAME}.")

except FileNotFoundError:
    print(f"❌ ERROR: File not found! Make sure '{DATA_FILENAME}' is in the same directory as this script.")
except json.JSONDecodeError as e:
    print(f"❌ ERROR: Failed to parse JSON in {DATA_FILENAME}. Check file format.")
    print(f"Details: {e}")
    '''

✅ Successfully loaded 1030 examples from short_ft.jsonl.


In [7]:
# Load training data
examples = []
with open(DATA_FILE, "r") as f:
    for line in f:
        if line.strip():
            examples.append(json.loads(line))

print(f"Loaded {len(examples)} examples.")
print(f"First example prompt (truncated): {examples[0]['prompt'][:100]}...")

# Convert to HuggingFace Dataset format for DPOTrainer
def format_for_dpo(example):
    """Format data for DPO training.
    DPOTrainer expects: prompt, chosen, rejected
    """
    # Format prompt with Mistral chat template
    prompt = f"[INST] {example['prompt']} [/INST]"
    return {
        "prompt": prompt,
        "chosen": example["chosen"],
        "rejected": example["rejected"],
    }

formatted_examples = [format_for_dpo(ex) for ex in examples]
dataset = Dataset.from_list(formatted_examples)

# Split into train/eval
dataset = dataset.train_test_split(test_size=0.05, seed=42)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

print(f"Train size: {len(train_dataset)}, Eval size: {len(eval_dataset)}")

Loaded 1030 examples.
First example prompt (truncated): Can brain cells move? By movement I mean long distance migration (preferably within the brain only)....
Train size: 978, Eval size: 52


## Cell 4: Load Model with Quantization

In [11]:
# Configure 4-bit quantization (QLoRA)
# NOTE: Do NOT use 8-bit - it causes gradient issues
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"  # Important for generation

# Load model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

print(f"Model loaded. Vocab size: {len(tokenizer)}")
print(f"Model device: {model.device}")

Loading checkpoint shards: 100%|██████████| 3/3 [05:34<00:00, 111.56s/it]


Model loaded. Vocab size: 32000
Model device: cpu


## Cell 5: Configure LoRA

In [14]:
lora_config = LoraConfig(
    r=LORA_RANK,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=TARGET_MODULES,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Count parameters for comparison
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTrainable parameters: {trainable_params:,}")
print(f"Neologism had: 4,096 parameters")
print(f"Ratio: {trainable_params / 4096:.1f}x more parameters than neologism")

trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940

Trainable parameters: 6,815,744
Neologism had: 4,096 parameters
Ratio: 1664.0x more parameters than neologism




## Cell 6: Configure Training

In [15]:
# Training arguments
training_args = DPOConfig(
    output_dir=f"./lora_short_rank{LORA_RANK}",
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    gradient_checkpointing=True,
    learning_rate=LR,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_steps=10,
    save_steps=100,
    eval_strategy="steps",
    eval_steps=50,
    fp16=True,
    optim="paged_adamw_8bit",  # Memory-efficient optimizer
    max_length=MAX_LENGTH,
    max_prompt_length=MAX_LENGTH // 2,
    beta=BETA,
    remove_unused_columns=False,
    report_to="none",  # Disable wandb
)

## Cell 7: Initialize DPO Trainer

In [16]:
# DPOTrainer handles reference model creation automatically
trainer = DPOTrainer(
    model=model,
    ref_model=None,  # Will use a copy of the model
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
)

print("DPO Trainer initialized successfully!")
print(f"Number of training steps: {len(trainer.get_train_dataloader()) * EPOCHS}")

Extracting prompt in train dataset: 100%|██████████| 978/978 [00:00<00:00, 6805.58 examples/s]
Applying chat template to train dataset: 100%|██████████| 978/978 [00:00<00:00, 23705.95 examples/s]
Tokenizing train dataset: 100%|██████████| 978/978 [00:01<00:00, 798.19 examples/s]
Extracting prompt in eval dataset: 100%|██████████| 52/52 [00:00<00:00, 8506.39 examples/s]
Applying chat template to eval dataset: 100%|██████████| 52/52 [00:00<00:00, 10230.49 examples/s]
Tokenizing eval dataset: 100%|██████████| 52/52 [00:00<00:00, 710.82 examples/s]


DPO Trainer initialized successfully!
Number of training steps: 4890


## Cell 8: Train

In [17]:
import time

# Memory tracking - reset stats before training
torch.cuda.reset_peak_memory_stats()
print("Starting training...")

start_time = time.time()

# Train the model
trainer.train()

training_time = time.time() - start_time

# Memory tracking - get peak usage after training
peak_memory_bytes = torch.cuda.max_memory_allocated()
peak_memory_gb = peak_memory_bytes / (1024**3)
peak_memory_mb = peak_memory_bytes / (1024**2)

print(f"\n{'='*60}")
print("TRAINING COMPLETE")
print(f"{'='*60}")
print(f"Training time: {training_time/60:.2f} minutes")
print(f"\n{'='*60}")
print("MEMORY USAGE")
print(f"{'='*60}")
print(f"Peak GPU Memory: {peak_memory_gb:.2f} GB ({peak_memory_mb:.0f} MB)")

AttributeError: module 'torch._C' has no attribute '_cuda_resetPeakMemoryStats'

## Cell 9: Save Model

In [None]:
import shutil

# Save LoRA adapter
save_path = f"./lora_short_rank{LORA_RANK}_final"
trainer.save_model(save_path)

# Save training config
config_info = {
    "lora_rank": LORA_RANK,
    "lora_alpha": LORA_ALPHA,
    "target_modules": TARGET_MODULES,
    "base_model": MODEL_NAME,
    "lr": LR,
    "epochs": EPOCHS,
    "beta": BETA,
    "training_examples": len(train_dataset),
    "training_time_minutes": training_time / 60,
    "peak_memory_gb": peak_memory_gb,
}

with open(f"{save_path}/training_config.json", "w") as f:
    json.dump(config_info, f, indent=2)

print(f"Saved LoRA adapter to {save_path}/")

# Download the adapter
shutil.make_archive(save_path.replace("./", ""), 'zip', save_path)
files.download(f"{save_path.replace('./', '')}.zip")

## Cell 10: Test the Model

In [None]:
# Test with sample prompts
test_prompts = [
    "What is machine learning?",
    "Explain the water cycle.",
    "How do airplanes fly?",
]

print("\n" + "="*50)
print("Testing trained model:")
print("="*50)

model.eval()
for prompt in test_prompts:
    formatted_prompt = f"[INST] {prompt} [/INST]"
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract just the response part
    response = response.split("[/INST]")[-1].strip()
    
    print(f"\nPrompt: {prompt}")
    print(f"Response: {response[:200]}...")
    print(f"Word count: {len(response.split())}")
    print("-" * 50)

print("\nTraining complete! Download the zip file for the LoRA adapter.")