In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import numpy as np

# ---- Config ----
SEED = 42
LAYER = 6
LAMBDA = 100.0
MAX_TOKENS = 130
TEMPERATURE = 0.8
TOP_K = 50
TOP_P = 0.95

# ---- Setup ----
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2", output_hidden_states=True, return_dict_in_generate=True)
model.eval().to(device)

# ---- Load steering vectors ----
vecs = {
    "none": None,
    "maria": torch.tensor(np.load("freedom_latent_maria_layer6.npy"), dtype=torch.float32).to(device),
    "shakespeare": torch.tensor(np.load("shakespeare_cpca_layer6_cpc1.npy"), dtype=torch.float32).to(device),
}

# ---- Core Function ----
def generate_with_vector(prompt, steering_vector=None, name="none"):
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to(device)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    if steering_vector is not None:
        with torch.no_grad():
            outputs = model.transformer(**inputs)
            residual = outputs.hidden_states[LAYER][0]  # [seq_len, hidden_dim]
            residual[-1] += LAMBDA * steering_vector
            logits = model.lm_head(residual.unsqueeze(0))
            next_token_id = torch.argmax(logits[0, -1]).unsqueeze(0)
            input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0)], dim=1)
            attention_mask = torch.cat([attention_mask, torch.ones_like(next_token_id.unsqueeze(0))], dim=1)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=MAX_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_k=TOP_K,
            top_p=TOP_P,
            return_dict_in_generate=True,
            output_scores=True,
            output_hidden_states=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
    scores = outputs.scores  # list of logits for each generated token
    return decoded, scores

# ---- Run Comparison ----
prompt = "The nature of intelligence lies in"
results = {}

for label, vec in vecs.items():
    print(f"\n🧠 Generating with: {label}")
    text, scores = generate_with_vector(prompt, vec, label)
    results[label] = {
        "text": text,
        "scores": scores
    }
    print(f"\n{text}\n")

# ---- Optional: Save results for analysis later ----
import pickle
with open("steering_comparison_results.pkl", "wb") as f:
    pickle.dump(results, f)

print("✅ All generations complete. Results saved to steering_comparison_results.pkl.")


FileNotFoundError: [Errno 2] No such file or directory: 'freedom_latent_maria_layer6.npy'