In [None]:
import json
import random
from lre import LREModel

In [None]:
# 1. Configuration
MODEL_NAME = "gpt2-xl" # Using gpt2-xl as it's smaller/faster than GPT-J-6B but still works
LAYER_NAME = "transformer.h.15" # Layer ~15 is usually good for relations in GPT2-XL
TEMPLATE = "{} students are typically" # The prompt template

In [None]:
# 2. Load Data
with open("data_sample.json", "r") as f:
    data = json.load(f)

In [None]:
# Simple split
random.shuffle(data)
split_idx = int(len(data) * 0.6)
train_data = data[:split_idx]
test_data = data[split_idx:]

In [None]:
print(f"Data: {len(train_data)} train, {len(test_data)} test")

In [None]:
# 3. Initialize Model
lre = LREModel(model_name=MODEL_NAME, device="mps")

In [None]:
# 4. Train Linear Relation Estimator
# This finds the linear linearity between the subject representation and the answer
operator = lre.train_lre(train_data, LAYER_NAME, TEMPLATE)

In [None]:
# 5. Test Faithfulness
# Does the linear approximation actually predict the correct answers?
lre.evaluate(operator, test_data, LAYER_NAME, TEMPLATE)