# 04 Â· Fine-tune Slice *t* (Unsloth QLoRA)

Load 4-bit Llama weights with Unsloth, configure LoRA adapters, and respect the 25M token budget per slice.

In [None]:
# Persistent project setup on Drive
import os
import sys
from pathlib import Path

DRIVE_ROOT = Path('/content/drive')
try:
    from google.colab import drive  # type: ignore
    if not DRIVE_ROOT.exists():
        drive.mount('/content/drive')
except Exception as exc:  # pragma: no cover
    print(f'Colab drive mount skipped: {exc}')

if DRIVE_ROOT.exists():
    BASE_ROOT = (DRIVE_ROOT / 'MyDrive').resolve()
else:
    BASE_ROOT = Path.home().resolve()

PROJECT_ROOT = BASE_ROOT / 'secure-llm-mia'
if not PROJECT_ROOT.exists():
    raise FileNotFoundError('Clone the repo via 00_colab_setup.ipynb before running this notebook.')

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

os.environ['SECURE_LLM_MIA_ROOT'] = str(PROJECT_ROOT)

DATA_ROOT = PROJECT_ROOT / 'data'
ARTIFACTS_DIR = PROJECT_ROOT / 'artifacts'
CHECKPOINT_ROOT = PROJECT_ROOT / 'checkpoints'
for path in (DATA_ROOT, ARTIFACTS_DIR, CHECKPOINT_ROOT):
    path.mkdir(parents=True, exist_ok=True)

os.chdir(PROJECT_ROOT)
print('PROJECT_ROOT:', PROJECT_ROOT)


In [None]:
import math
from pathlib import Path

import torch
from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer, SFTConfig

from src.modeling.lora import LoRAHyperParams, compute_gradient_accumulation
from src.modeling.train import TokenBudgetTracker

SLICE_ID = 1
TRACK = 'noreplay'
MODEL_NAME = os.getenv('UNSLOTH_MODEL_NAME', 'unsloth/Meta-Llama-3.1-8B-bnb-4bit')
MAX_SEQ_LENGTH = 4096
TOKENS_PER_SLICE = 25_000_000
TOKENS_PER_STEP = 128_000
MICRO_BATCH = 1
AVG_TOKENS_PER_SAMPLE = 3_000

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=True,
)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token

lora_cfg = LoRAHyperParams(r=32, alpha=32, dropout=0.05, target_modules=('q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'))
model = FastLanguageModel.get_peft_model(
    model,
    r=lora_cfg.r,
    target_modules=list(lora_cfg.target_modules),
    lora_alpha=lora_cfg.alpha,
    lora_dropout=lora_cfg.dropout,
    bias='none',
    use_gradient_checkpointing='unsloth',
)

accum_steps = compute_gradient_accumulation(TOKENS_PER_STEP, MICRO_BATCH, AVG_TOKENS_PER_SAMPLE)
print('Gradient accumulation:', accum_steps)


In [None]:
# Synthetic dataset placeholder; replace with slice-specific packed data
texts = [
    'Patient admitted with pneumonia. Received antibiotics and improved.',
    'ICU stay complicated by sepsis. Managed with fluids and vasopressors.',
]
dataset = Dataset.from_dict({'text': texts})

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=SFTConfig(
        learning_rate=1e-4,
        per_device_train_batch_size=MICRO_BATCH,
        gradient_accumulation_steps=accum_steps,
        warmup_steps=10,
        max_steps=5,
        logging_steps=1,
        output_dir=str(CHECKPOINT_ROOT / f'slice_{SLICE_ID}' / TRACK),
    ),
    formatting_func=lambda examples: [f"<s> {text} </s>" for text in examples['text']],
)


In [None]:
tracker = TokenBudgetTracker(tokens_per_slice=TOKENS_PER_SLICE)
mock_batches = [(MICRO_BATCH, AVG_TOKENS_PER_SAMPLE) for _ in range(1000)]
processed = trainer.train()
print(processed)
for mb, avg in mock_batches:
    if tracker.update(mb * avg):
        break
print(f'Tokens consumed (synthetic): {tracker.consumed_tokens:,}')


In [None]:
checkpoint_dir = CHECKPOINT_ROOT / f'slice_{SLICE_ID}' / TRACK
checkpoint_dir.mkdir(parents=True, exist_ok=True)
FastLanguageModel.save_pretrained(model, checkpoint_dir)
tokenizer.save_pretrained(checkpoint_dir)
print('Saved adapters + tokenizer to', checkpoint_dir)


Replace the toy dataset with packed slice data and keep token accounting aligned with the 25M budget.