# 02 - SFT Training on Kaggle (Generative Manim)

Self-contained notebook for QLoRA SFT on Kaggle T4 GPU.

**Setup**: Upload `sft_train.jsonl` and `sft_val.jsonl` to Kaggle as a dataset.

In [None]:
# Install dependencies
!pip install -q torch transformers trl peft bitsandbytes accelerate datasets wandb

In [None]:
import os
import json
import torch
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import SFTTrainer
import wandb

In [None]:
# Configuration
MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"  # Change for other models
MODEL_NAME = "qwen2.5-coder-7b"
MAX_SEQ_LENGTH = 4096

# Paths - adjust to your Kaggle dataset
TRAIN_PATH = "/kaggle/input/gm-training-data/sft_train.jsonl"
VAL_PATH = "/kaggle/input/gm-training-data/sft_val.jsonl"
OUTPUT_DIR = f"/kaggle/working/sft-{MODEL_NAME}"

# Hyperparameters
NUM_EPOCHS = 3
LEARNING_RATE = 2e-4
BATCH_SIZE = 2
GRAD_ACCUM = 8
LORA_R = 32
LORA_ALPHA = 64

In [None]:
# W&B login (use Kaggle secrets)
from kaggle_secrets import UserSecretsClient
try:
    secrets = UserSecretsClient()
    wandb_key = secrets.get_secret("WANDB_API_KEY")
    wandb.login(key=wandb_key)
    USE_WANDB = True
except:
    print("W&B not configured, training without logging")
    USE_WANDB = False

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model = prepare_model_for_kbit_training(model)
print(f"Model loaded: {MODEL_ID}")

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=0.05,
    target_modules="all-linear",
    bias="none",
    task_type="CAUSAL_LM",
)

# Load dataset
train_dataset = load_dataset("json", data_files=TRAIN_PATH, split="train")
val_dataset = load_dataset("json", data_files=VAL_PATH, split="train")
print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)}")

# Formatting function
def formatting_func(examples):
    texts = []
    for messages in examples["messages"]:
        text = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=False
        )
        texts.append(text)
    return texts

In [None]:
# Training args
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUM,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    eval_steps=100,
    eval_strategy="steps",
    save_total_limit=2,
    max_grad_norm=1.0,
    weight_decay=0.01,
    optim="paged_adamw_32bit",
    group_by_length=True,
    report_to="wandb" if USE_WANDB else "none",
    seed=42,
)

# Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    formatting_func=formatting_func,
    max_seq_length=MAX_SEQ_LENGTH,
    tokenizer=tokenizer,
)

In [None]:
# Train!
trainer.train()

# Save
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

if USE_WANDB:
    wandb.finish()

In [None]:
# Quick test: generate a sample
from peft import PeftModel

model.eval()
test_prompt = "Create an animation showing a blue circle transforming into a red square"
messages = [
    {"role": "system", "content": "Write Manim scripts for animations in Python. Generate code, not text. Always use GenScene as the class name."},
    {"role": "user", "content": test_prompt},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=512, temperature=0.2, do_sample=True)

response = tokenizer.decode(output[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
print(response)

In [None]:
# Optional: Push to HuggingFace Hub
# from huggingface_hub import login
# login(token=secrets.get_secret("HF_TOKEN"))
# model.push_to_hub(f"your-username/gm-sft-{MODEL_NAME}")
# tokenizer.push_to_hub(f"your-username/gm-sft-{MODEL_NAME}")