# 05 Â· Evaluation Generation & Log-probs

Load Unsloth checkpoints, run evaluation prompts, and capture per-token statistics for MIA features.

In [1]:
!pip install unsloth



In [2]:
# Persistent Drive + run mode setup
import os
import sys
from pathlib import Path

try:
    from google.colab import drive  # type: ignore
    DRIVE_MOUNT = Path('/content/drive')
    if not DRIVE_MOUNT.exists():
        drive.mount('/content/drive')
except Exception as exc:  # pragma: no cover
    print(f'Colab drive mount skipped: {exc}')

if Path('/content/drive').exists():
    DRIVE_ROOT = Path('/content/drive/MyDrive').resolve()
else:
    DRIVE_ROOT = Path.home().resolve()

PROJECT_ROOT = DRIVE_ROOT / 'secure-llm-mia'
if not PROJECT_ROOT.exists():
    raise FileNotFoundError('Run 00_colab_setup.ipynb first to clone the repo on Drive.')

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

os.environ['SECURE_LLM_MIA_ROOT'] = str(PROJECT_ROOT)
os.chdir(PROJECT_ROOT)

from src.utils.runtime import current_run_mode

RUN_MODE = current_run_mode()
print('PROJECT_ROOT:', PROJECT_ROOT)
print('Active run mode:', RUN_MODE.name, '-', RUN_MODE.description)

DATA_ROOT = PROJECT_ROOT / 'data'
ARTIFACTS_DIR = PROJECT_ROOT / 'artifacts'
CHECKPOINT_ROOT = PROJECT_ROOT / 'checkpoints'
for path in (DATA_ROOT, ARTIFACTS_DIR, CHECKPOINT_ROOT):
    path.mkdir(parents=True, exist_ok=True)

BHC_DATA_DIR = DRIVE_ROOT / 'mimic-iv-bhc'
BHC_DATA_DIR.mkdir(parents=True, exist_ok=True)
BHC_CSV_PATH = BHC_DATA_DIR / 'mimic-iv-bhc.csv'
print('BHC CSV path:', BHC_CSV_PATH)


PROJECT_ROOT: /content/drive/MyDrive/secure-llm-mia
Active run mode: subset - Quick debugging subset (<=2k rows) for lightweight Colab smoke tests.
BHC CSV path: /content/drive/MyDrive/mimic-iv-bhc/mimic-iv-bhc.csv


In [3]:
import numpy as np
import torch
from unsloth import FastLanguageModel

from src.modeling.logprobs import token_level_stats

SLICE_ID = 1
TRACK = 'noreplay'
MAX_SEQ_LENGTH = 4096
checkpoint_dir = CHECKPOINT_ROOT / f'slice_{SLICE_ID}' / TRACK / RUN_MODE.name
if not checkpoint_dir.exists():
    raise FileNotFoundError('Run notebook 04 to create checkpoints first.')

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=str(checkpoint_dir),
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=True,
)
FastLanguageModel.for_inference(model)
tokenizer.model_max_length = MAX_SEQ_LENGTH

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.3: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

unsloth/meta-llama-3.1-8b-bnb-4bit does not have a padding token! Will use pad_token = <|finetune_right_pad_id|>.


Unsloth 2025.11.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [None]:
import pandas as pd

CANONICAL_PATH = ARTIFACTS_DIR / f'canonical_bhc_{RUN_MODE.name}.parquet'
if not CANONICAL_PATH.exists():
    raise FileNotFoundError('Canonical dataset missing. Run notebook 01 to create it.')

canonical_df = pd.read_parquet(CANONICAL_PATH)
canonical_df = canonical_df.drop_duplicates('subject_id').set_index('subject_id')

IDS_DIR = ARTIFACTS_DIR / f'slice_{SLICE_ID}' / 'ids'

def read_ids(filename: str) -> list[str]:
    path = IDS_DIR / filename
    if not path.exists():
        raise FileNotFoundError(f'Required ID list not found: {path}')
    return [line.strip() for line in path.read_text().splitlines() if line.strip()]

member_ids = read_ids('members.txt')
non_member_ids = read_ids('non_members.txt')
EVAL_PER_CLASS = min(5, len(member_ids), len(non_member_ids))
if EVAL_PER_CLASS == 0:
    raise ValueError('Not enough member/non-member IDs to build an evaluation set.')

eval_examples: list[dict] = []
for sid in member_ids[:EVAL_PER_CLASS]:
    if sid in canonical_df.index:
        eval_examples.append({'example_id': sid, 'label': 1, 'text': canonical_df.at[sid, 'text']})
for sid in non_member_ids[:EVAL_PER_CLASS]:
    if sid in canonical_df.index:
        eval_examples.append({'example_id': sid, 'label': 0, 'text': canonical_df.at[sid, 'text']})

if not eval_examples:
    raise ValueError('No evaluation examples found in canonical dataset for provided IDs.')

print(f'Evaluation examples: {len(eval_examples)} (members={EVAL_PER_CLASS}, non-members={EVAL_PER_CLASS})')

In [6]:
prompts = [ex['text'] for ex in eval_examples]
example_ids = [ex['example_id'] for ex in eval_examples]
labels = [ex['label'] for ex in eval_examples]

inputs = tokenizer(prompts, return_tensors='pt', padding=True, truncation=True, max_length=MAX_SEQ_LENGTH).to(model.device)
with torch.inference_mode():
    outputs = model(**inputs)
logits = outputs.logits.detach().cpu().numpy()
target_ids = inputs['input_ids'].cpu().numpy()
stats = token_level_stats(logits, target_ids)
print('Available features:', stats.keys())

Available features: dict_keys(['nll', 'entropy', 'max_prob', 'log_probs', 'win@1', 'win@5', 'win@10', 'win@20'])


Persist per-example features to `reports/features_slice_t.parquet` for downstream attacks once evaluation prompts are finalized.

In [8]:
from datasets import Dataset

FEATURES_DIR = PROJECT_ROOT / 'reports' / 'features'
FEATURES_DIR.mkdir(parents=True, exist_ok=True)
features_path = FEATURES_DIR / f'features_slice_{SLICE_ID}_{TRACK}_{RUN_MODE.name}.parquet'

feature_data = {
    'example_id': example_ids,
    'label': labels,
    'token_nll': stats['nll'].tolist(),
    'token_entropy': stats['entropy'].tolist(),
    'token_max_prob': stats['max_prob'].tolist(),
}
for k in (1, 5, 10, 20):
    key = f'win@{k}'
    feature_data[key] = stats[key].astype(int).tolist()

feature_dataset = Dataset.from_dict(feature_data)
feature_dataset.to_parquet(str(features_path))
print(f'Saved feature dataset to {features_path}')

Saved feature table to /content/drive/MyDrive/secure-llm-mia/reports/features/features_slice_1_noreplay_subset.parquet
