# Leonardo AI Assistant - Unsloth LoRA Training

**Google Colab notebook for training Leonardo's LLM planner with Unsloth**

This notebook trains custom LoRA adapters for Leonardo's voice-first AI assistant using Unsloth on Colab GPUs.

## 🎯 Workflow
1. **Train**: Use this Colab notebook with GPU runtime
2. **Save**: Export LoRA adapter to Google Drive  
3. **Download**: Copy adapter to Leonardo's `lora_registry/`
4. **Test**: Evaluate with Leonardo's test suite
5. **Deploy**: Promote successful adapters to production

## 🔧 Requirements
- **Runtime**: GPU (T4/L4 free, A100 for Pro+)
- **Model**: Qwen2.5-3B/7B or Llama-3.1-8B
- **Method**: QLoRA (4-bit) for memory efficiency
- **Session**: 12-24h with Drive checkpoints


In [None]:
# Step 0: Check GPU allocation and setup
!nvidia-smi

import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

print("\n📋 Before continuing:")
print("1. Runtime → Change runtime type → GPU")
print("2. Prefer A100 if you have Colab Pro/Pro+")


In [None]:
# Step 1: Install dependencies for Leonardo training
import subprocess
import sys

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

# Install core dependencies
packages = ["unsloth", "bitsandbytes", "accelerate", "datasets", "peft", "transformers", "trl"]
for package in packages:
    install_package(package)

# Optional: xformers for attention optimization
try:
    install_package("xformers --no-deps")
    print("✅ XFormers installed for attention optimization")
except:
    print("⚠️ XFormers not compatible, using default attention")

print("✅ Dependencies installed successfully!")


In [None]:
# Step 2: Load base model for Leonardo
from unsloth import FastLanguageModel
import torch
import os

# Model configuration - adjust based on GPU
# T4/L4 (Free): Use Qwen2.5-3B-Instruct
# A100 (Pro+): Use Qwen2.5-7B-Instruct or Llama-3.1-8B-Instruct
base_model = "Qwen/Qwen2.5-7B-Instruct"  
max_seq_len = 2048
dtype = None  # let Unsloth pick fp16/bf16

print(f"🧠 Loading {base_model} for Leonardo...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model,
    max_seq_len=max_seq_len,
    load_in_4bit=True,      # QLoRA for memory efficiency
)

# Configure LoRA adapter
model = FastLanguageModel.get_peft_model(
    model,
    r=16, 
    lora_alpha=16, 
    lora_dropout=0.05,
    target_modules="all-linear",  # Unsloth will pick defaults per-arch
)

print("✅ Leonardo base model and LoRA adapter ready!")
print(f"Trainable parameters: {model.get_nb_trainable_parameters()}")


In [None]:
# Step 3: Prepare training dataset
# TODO: Replace with Leonardo-specific dataset
from datasets import load_dataset

# Load a small demo dataset (replace with Leonardo conversation data)
ds = load_dataset("yahma/alpaca-cleaned", split="train[:1%]")  # small slice for demo

def format_leonardo_conversation(example):
    """Format example for Leonardo's voice-first conversation format."""
    prompt = example.get("instruction","") + "\n" + example.get("input","")
    answer = example.get("output","")
    
    # Format as Leonardo conversation
    messages = [
        {"role": "system", "content": "You are Leonardo, a voice-first AI assistant. Generate helpful responses to user requests."},
        {"role": "user", "content": prompt.strip()},
        {"role": "assistant", "content": answer.strip()}
    ]
    
    # Apply chat template
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return {"text": text}

train = ds.map(format_leonardo_conversation, remove_columns=ds.column_names)

print(f"✅ Training dataset prepared: {len(train)} examples")
print("\nExample conversation:")
print(train[0]["text"][:300] + "...")


In [None]:
# Step 4: Train Leonardo LoRA adapter
from trl import SFTTrainer
from transformers import TrainingArguments

# Training configuration optimized for Colab
args = TrainingArguments(
    output_dir="leonardo-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=10,
    save_steps=200,
    bf16=True if torch.cuda.is_available() else False,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train,
    dataset_text_field="text",
    max_seq_length=1024,
    packing=True,
    args=args,
)

print("🚀 Starting Leonardo LoRA training...")
trainer.train()
print("🎉 Training completed!")


In [None]:
# Step 5: Save LoRA adapter to Google Drive
from google.colab import drive
drive.mount('/content/drive')

from datetime import datetime
import json

# Create timestamped adapter directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
adapter_name = f"leonardo-lora-qwen-7b-{timestamp}"
adapter_dir = f"/content/drive/MyDrive/leonardo-lora-qwen-7b"

print(f"💾 Saving Leonardo adapter to Drive: {adapter_dir}")

# Save adapter and tokenizer
trainer.model.save_pretrained(adapter_dir)
tokenizer.save_pretrained(adapter_dir)

# Save metadata for Leonardo integration
metadata = {
    "adapter_name": adapter_name,
    "base_model": base_model,
    "training_date": timestamp,
    "lora_rank": 16,
    "training_steps": trainer.state.global_step,
    "dataset_size": len(train),
    "colab_gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu"
}

with open(f"{adapter_dir}/leonardo_metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

print("✅ Saved to", adapter_dir)
print("📋 Next: Download to Leonardo's lora_registry/ and test!")
