# 03 - Results Analysis and Validation

In [None]:
# Ensure the repo root (containing `src/` and `data/`) is on sys.path
import sys, os
from pathlib import Path

def add_repo_root(marker_dir='src'):
    path = os.path.abspath(os.getcwd())
    prev = None
    while path and path != prev:
        if os.path.isdir(os.path.join(path, marker_dir)):
            if path not in sys.path:
                sys.path.insert(0, path)
            return path
        prev = path
        path = os.path.dirname(path)
    nb_dir = os.path.abspath(os.getcwd())
    candidate = os.path.abspath(os.path.join(nb_dir, '..'))
    if os.path.isdir(os.path.join(candidate, marker_dir)):
        if candidate not in sys.path:
            sys.path.insert(0, candidate)
        return candidate
    return os.path.abspath(os.getcwd())

REPO_ROOT = add_repo_root()
DATA_PATH = str(Path(REPO_ROOT) / 'data' / 'ioi_examples.json')


In [None]:
import torch
from transformer_lens import HookedTransformer
from src.ioi_task import load_ioi_examples
from src.visualization import plot_head_importance
from src.eap_algorithm import EAPConfig, EdgeAttributionPatching

model = HookedTransformer.from_pretrained('gpt2-small')
examples = load_ioi_examples(DATA_PATH)
# ... load your saved importance tensor if you saved it to disk ...
# Here we'll recompute on a small subset for demo
clean = []
corr = []
for ex in examples[:8]:
    clean.append(model.to_tokens(ex.clean)[0])
    corr.append(model.to_tokens(ex.corrupted)[0])
clean = torch.nn.utils.rnn.pad_sequence(clean, batch_first=True, padding_value=model.tokenizer.eos_token_id)
corr = torch.nn.utils.rnn.pad_sequence(corr, batch_first=True, padding_value=model.tokenizer.eos_token_id)
eap = EdgeAttributionPatching(model, EAPConfig(target_token_idx=-1))
importance = eap.compute_head_importance(clean, corr)
save_dir = Path(REPO_ROOT) / 'results' / 'plots'
save_dir.mkdir(parents=True, exist_ok=True)
plot_head_importance(importance, save_path=str(save_dir / 'head_importance_analysis.png'))
