# 04 · Fine-tune Slice *t* (Unsloth QLoRA)

Load 4-bit Llama weights, configure adapters, and respect the 25M token budget per slice.

In [None]:
# Persistent Drive + run mode setup
import os
import sys
from pathlib import Path

try:
    from google.colab import drive  # type: ignore
    DRIVE_MOUNT = Path('/content/drive')
    if not DRIVE_MOUNT.exists():
        drive.mount('/content/drive')
except Exception as exc:  # pragma: no cover
    print(f'Colab drive mount skipped: {exc}')

if Path('/content/drive').exists():
    DRIVE_ROOT = Path('/content/drive/MyDrive').resolve()
else:
    DRIVE_ROOT = Path.home().resolve()

PROJECT_ROOT = DRIVE_ROOT / 'secure-llm-mia'
if not PROJECT_ROOT.exists():
    raise FileNotFoundError('Run 00_colab_setup.ipynb first to clone the repo on Drive.')

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

os.environ['SECURE_LLM_MIA_ROOT'] = str(PROJECT_ROOT)
os.chdir(PROJECT_ROOT)

from src.utils.runtime import current_run_mode

RUN_MODE = current_run_mode()
print('PROJECT_ROOT:', PROJECT_ROOT)
print('Active run mode:', RUN_MODE.name, '-', RUN_MODE.description)

DATA_ROOT = PROJECT_ROOT / 'data'
ARTIFACTS_DIR = PROJECT_ROOT / 'artifacts'
CHECKPOINT_ROOT = PROJECT_ROOT / 'checkpoints'
for path in (DATA_ROOT, ARTIFACTS_DIR, CHECKPOINT_ROOT):
    path.mkdir(parents=True, exist_ok=True)

BHC_DATA_DIR = DRIVE_ROOT / 'mimic-iv-bhc'
BHC_DATA_DIR.mkdir(parents=True, exist_ok=True)
BHC_CSV_PATH = BHC_DATA_DIR / 'mimic-iv-bhc.csv'
print('BHC CSV path:', BHC_CSV_PATH)


In [None]:
import math
from pathlib import Path

import torch
from datasets import Dataset
from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments
from unsloth import FastLanguageModel

from src.modeling.lora import LoRAHyperParams, compute_gradient_accumulation
from src.modeling.train import TokenBudgetTracker

SLICE_ID = 1
TRACK = 'noreplay'
MODEL_NAME = os.getenv('UNSLOTH_MODEL_NAME', 'unsloth/Meta-Llama-3.1-8B-bnb-4bit')
MAX_SEQ_LENGTH = 4096
TOKENS_PER_SLICE = 25_000_000
TOKENS_PER_STEP = 128_000
MICRO_BATCH = 1
AVG_TOKENS_PER_SAMPLE = 3_000

packed_path = ARTIFACTS_DIR / 'packed' / RUN_MODE.name / 'packed_sequences.parquet'
if not packed_path.exists():
    raise FileNotFoundError('Packed shards missing. Run notebook 03 to generate them.')

train_dataset = Dataset.from_parquet(str(packed_path))
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
print('Loaded packed sequences:', len(train_dataset))

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=True,
)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token

lora_cfg = LoRAHyperParams(
    r=32,
    alpha=32,
    dropout=0.05,
    target_modules=('q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'),
)
model = FastLanguageModel.get_peft_model(
    model,
    r=lora_cfg.r,
    target_modules=list(lora_cfg.target_modules),
    lora_alpha=lora_cfg.alpha,
    lora_dropout=lora_cfg.dropout,
    bias='none',
    use_gradient_checkpointing='unsloth',
)

accum_steps = compute_gradient_accumulation(TOKENS_PER_STEP, MICRO_BATCH, AVG_TOKENS_PER_SAMPLE)
print('Gradient accumulation:', accum_steps)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
output_dir = CHECKPOINT_ROOT / f'slice_{SLICE_ID}' / TRACK / RUN_MODE.name
training_args = TrainingArguments(
    output_dir=str(output_dir),
    per_device_train_batch_size=MICRO_BATCH,
    gradient_accumulation_steps=accum_steps,
    learning_rate=1e-4,
    warmup_steps=10,
    max_steps=5,
    logging_steps=1,
    save_steps=50,
    bf16=torch.cuda.is_available(),
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)


In [None]:
trainer.train()

In [None]:
tracker = TokenBudgetTracker(tokens_per_slice=TOKENS_PER_SLICE)
for seq in train_dataset['input_ids']:
    if tracker.update(len(seq)):
        break
print(f'Approximate tokens registered: {tracker.consumed_tokens:,}')
print(f'Remaining tokens: {tracker.remaining:,}')

In [None]:
checkpoint_dir = CHECKPOINT_ROOT / f'slice_{SLICE_ID}' / TRACK / RUN_MODE.name
checkpoint_dir.mkdir(parents=True, exist_ok=True)
FastLanguageModel.save_pretrained(model, checkpoint_dir)
tokenizer.save_pretrained(checkpoint_dir)
print('Saved adapters + tokenizer to', checkpoint_dir)

✅ Fine-tuning now consumes the packed shards saved in notebook 03 so you can run the end-to-end pipeline without synthetic placeholders.