# Supervised Fine-Tuning on CUDA

<a target="_blank" href="https://colab.research.google.com/github/simonguest/CS-394/blob/main/src/07/notebooks/train-cuda.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
<a target="_blank" href="https://github.com/simonguest/CS-394/raw/refs/heads/main/src/07/notebooks/train-cuda.ipynb">
  <img src="https://img.shields.io/badge/Download_.ipynb-blue" alt="Download .ipynb"/>
</a>

## Dependencies

In [None]:
!uv pip install -q datasets bitsandbytes trl

## Training parameters

In [None]:
import os

DATASET_REPO = "simonguest/test-dataset" # Change this to your own dataset location on HF
BASE_MODEL = "google/gemma-3-1b-it" # Base model that will be fine-tuned
MODEL_NAME = "code-explainer" # Name of the model you want to create
os.environ["WANDB_PROJECT"] = MODEL_NAME # WandB project name

MODEL_FOLDER = f"./models/{MODEL_NAME}"

# Training run parameters

BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
NUM_EPOCHS = 3
MAX_SEQ_LENGTH = 512

# LoRA parameters
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05

# Use 4-bit quantization for efficiency (QLoRA)
USE_4BIT = True

## API keys and tokens

In [None]:
import sys
import os
from dotenv import load_dotenv

if 'google.colab' in sys.modules:
  from google.colab import userdata # type:ignore
  os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
  os.environ['WANDB_API_KEY'] = userdata.get('WANDB_API_KEY')
  print("HF and WANDB API Tokens set for Colab")
else:
  load_dotenv()
  print("Loaded env vars from .env")

## Load dataset from Hugging Face

In [None]:
from datasets import load_dataset
dataset = load_dataset(DATASET_REPO)

## Format dataset for correct chat template

In [None]:
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

def format_chat_template(example):
    messages = example["messages"]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": text}

formatted_dataset = dataset.map(
    format_chat_template,
    remove_columns=dataset['train'].column_names # type: ignore
)

print("\nFormatted example:")
print(formatted_dataset['train'][0]['text'][:500])  # type: ignore

## Load base model with QLoRA configuration

In [None]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Configure 4-bit quantization
if USE_4BIT:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )
else:
    bnb_config = None

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    dtype=torch.bfloat16,
)

# Prepare model for k-bit training
if USE_4BIT:
    model = prepare_model_for_kbit_training(model)

# Configure LoRA
peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

## Create training configuration and trainer

In [None]:
import os
from transformers import TrainingArguments
from trl import SFTTrainer

report_to = "none"
if os.environ.get("WANDB_API_KEY") != None:
  report_to = "wandb"

# Training arguments
training_args = TrainingArguments(
    output_dir=MODEL_FOLDER,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    bf16=True,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    report_to=report_to,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=formatted_dataset["train"],
    eval_dataset=formatted_dataset["validation"],
    processing_class=tokenizer,
)

# Save the tokenizer model as this won't change during training
tokenizer.save_pretrained(f"{MODEL_FOLDER}/lora")

# Train and save the final model

In [None]:
# Start training
trainer.train()

# Save the final model
trainer.save_model(f"{MODEL_FOLDER}/lora")