In [None]:
!pip install torch datasets ipywidgets transformers peft

In [None]:
import os
from pathlib import Path

import torch
from datasets import load_dataset

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)

from peft import LoraConfig, get_peft_model

os.environ["PYTORCH_NO_NVML"] = "1"

print("Torch version:", torch.__version__)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


In [None]:
ds_name = "surfing"
if ds_name is None:
    raise RuntimeError("DATASET_NAME not found in .env.state â€“ run 1_gen.ipynb first.")

print("Using dataset:", ds_name)

ds_root = Path("dataset") / ds_name
files_root = ds_root.parent / f"{ds_name}-files"

dataset_path_ft_train = files_root / f"{ds_name}-ft.train.jsonl"
dataset_path_ft_valid = files_root / f"{ds_name}-ft.valid.jsonl"

print("Train path:", dataset_path_ft_train)
print("Valid path:", dataset_path_ft_valid)

if not dataset_path_ft_train.is_file():
    raise FileNotFoundError(f"Training file not found: {dataset_path_ft_train}")
if not dataset_path_ft_valid.is_file():
    raise FileNotFoundError(f"Validation file not found: {dataset_path_ft_valid}")


In [None]:
import pandas as pd

print("Training split preview:")
display(pd.read_json(dataset_path_ft_train, lines=True).head(3))

print("Validation split preview:")
display(pd.read_json(dataset_path_ft_valid, lines=True).head(3))

In [None]:
data_files = {
    "train": str(dataset_path_ft_train),
    "validation": str(dataset_path_ft_valid),
}

raw_datasets = load_dataset("json", data_files=data_files)
raw_datasets

In [None]:
MODEL_ID = "ibm-granite/granite-4.0-micro"
MAX_SEQ_LEN = 1024  # you can push higher if your GPU can handle it

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    use_fast=True,
)
# Granite models use special chat tokens; make sure pad exists
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ---- Base model load ----
# For basic LoRA (no quantization):
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)

model.config.use_cache = False  # important for training
print("Model loaded on:", model.device)

In [None]:
def format_example_to_text(example):
    """
    Convert one RAFT chat example to a single training text string
    using Granite's chat template.

    example["messages"] is a list of {role, content} dicts:
      [{"role":"system","content":...}, {"role":"user",...}, {"role":"assistant",...}, ...]
    """
    messages = example["messages"]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,  # we include assistant answer in the training text
    )
    return text


def tokenize_function(example):
    text = format_example_to_text(example)
    tokenized = tokenizer(
        text,
        truncation=True,
        max_length=MAX_SEQ_LEN,
        padding="longest",
    )
    # Standard causal LM SFT: labels == input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized


processed_datasets = raw_datasets.map(
    tokenize_function,
    remove_columns=raw_datasets["train"].column_names,
    desc="Tokenizing dataset",
)

processed_datasets


In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
from transformers import TrainingArguments, Trainer

OUTPUT_DIR = f"outputs/granite-4.0-micro-raft-peft-{ds_name}"

train_batch_size = 1   # smallest possible
eval_batch_size = 1
num_epochs = 1
learning_rate = 2e-4
warmup_ratio = 0.03

# rough warmup_steps approximation (if you want to actually use it)
num_train_examples = len(processed_datasets["train"])
steps_per_epoch = max(num_train_examples // (train_batch_size * 4), 1)  # 4 = grad_accum
total_steps = steps_per_epoch * num_epochs
warmup_steps = int(total_steps * warmup_ratio)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=eval_batch_size,
    gradient_accumulation_steps=1,
    num_train_epochs=num_epochs,
    learning_rate=learning_rate,
    warmup_steps=warmup_steps,
    logging_steps=10,
    save_strategy="epoch",
    bf16=torch.cuda.is_available(),
    fp16=False,
    report_to="none",
    save_total_limit=2,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_datasets["train"],
    eval_dataset=processed_datasets["validation"],  # this is fine; it just won't auto-eval
)


In [None]:
train_result = trainer.train()
trainer.save_state()

In [None]:
metrics = trainer.evaluate()
print(metrics)

In [None]:
SAVE_DIR = f"outputs/granite-4.0-micro-raft-peft-{ds_name}"

model.save_pretrained(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)

print("Saved adapter + tokenizer to:", SAVE_DIR)