In [5]:
import numpy as np
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sklearn.preprocessing import StandardScaler
from scipy.linalg import eigh
import matplotlib.pyplot as plt

# --- Config ---
SEED = 42
LAYER = 6
LAMBDA = 0.0
MAX_TOKENS = 130
TEMPERATURE = 0.8

# Set seed
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# --- Load activations ---
X_shakespeare = np.load("shakespeare_layer6.npy")
X_maria = np.load("maria_layer6.npy")

# --- Normalize ---
scaler = StandardScaler()
X_foreground = scaler.fit_transform(X_shakespeare)
X_background = scaler.transform(X_maria)

# --- Contrastive PCA ---
alpha = 1.0
C_fg = np.cov(X_foreground, rowvar=False)
C_bg = np.cov(X_background, rowvar=False)
C_contrastive = C_fg - alpha * C_bg

eigvals, eigvecs = eigh(C_contrastive)
idx = np.argsort(eigvals)[::-1]
eigvecs = eigvecs[:, idx]
cpc1 = eigvecs[:, 0]

# --- Save Shakespeare vector ---
np.save("shakespeare_cpca_layer6_cpc1.npy", cpc1)
print("✅ Saved: shakespeare_cpca_layer6_cpc1.npy")

# --- Inject into GPT-2 ---
cpc1 = torch.tensor(cpc1, dtype=torch.float32)

# Load model + tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
cpc1 = cpc1.to(device)

# Prompt
prompt = "The nature of intelligence lies in"
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to(device)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# Forward pass to layer
with torch.no_grad():
    outputs = model.transformer(**inputs, output_hidden_states=True)
    residual = outputs.hidden_states[LAYER][0]  # [seq_len, 768]

# Inject Shakespeare latent into final token
residual[-1] += LAMBDA * cpc1

# Generate one token with injection
with torch.no_grad():
    logits = model.lm_head(residual.unsqueeze(0))
    next_token_id = torch.argmax(logits[0, -1]).unsqueeze(0)
    input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0)], dim=1)
    attention_mask = torch.cat([attention_mask, torch.ones_like(next_token_id.unsqueeze(0))], dim=1)

# Full continuation
output = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=MAX_TOKENS,
    do_sample=True,
    temperature=TEMPERATURE,
    top_k=50,
    top_p=0.95,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
)

result = tokenizer.decode(output[0], skip_special_tokens=True)

print("\n🎭 GPT-2 with Shakespeare latent injected:\n")
print(result)


✅ Saved: shakespeare_cpca_layer6_cpc1.npy

🎭 GPT-2 with Shakespeare latent injected:

The nature of intelligence lies in the recognition of the inherent nature of the individual and in the willingness to pursue the individual's interests at all cost. The ability to be free, to exercise one's freedom, to engage in political activity, to participate in a social and political discussion, to engage in social debate, to engage in community service, to participate in other forms of expression, and to participate in the social interaction of the world are the same as those that are innate to the individual.

Thus, the individual can have all of the essential qualities, qualities that are unique to him, but that are not in common with all others. Therefore, the individual can only be free


In [6]:
import numpy as np

# Load full cPCA components from earlier runs (each shape: [768, n_components])
maria_cpca = np.load("cpca_layer6_cpc1.npy") if "cpca_layer6_cpc1.npy" in locals() else None
shakespeare_cpca = np.load("shakespeare_cpca_layer6_cpc1.npy") if "shakespeare_cpca_layer6_cpc1.npy" in locals() else None

# Option 1: You already have the components in memory — just save them
# Option 2: If you have them saved as .npy from contrastive PCA, load them manually like this:
# maria_cpca = np.load("cpca_layer6_components.npy")[:, 0]  # first component
# shakespeare_cpca = np.load("shakespeare_layer6_components.npy")[:, 0]

# Save them under the names used in the generation script
if maria_cpca is not None:
    np.save("freedom_latent_maria_layer6.npy", maria_cpca)
    print("✅ Saved Maria latent: freedom_latent_maria_layer6.npy")

if shakespeare_cpca is not None:
    np.save("shakespeare_cpca_layer6_cpc1.npy", shakespeare_cpca)
    print("✅ Saved Shakespeare latent: shakespeare_cpca_layer6_cpc1.npy")