# Try to steer with PC1

In [1]:
import json
import os
import torch
import sys

sys.path.append('.')
sys.path.append('..')

from utils.steering_utils import ActivationSteering
from utils.probing_utils import *
from utils.inference_utils import *

torch.set_float32_matmul_precision('high')

INFO 08-13 00:55:07 [__init__.py:235] Automatically detected platform cuda.


In [2]:
layer = 22

In [3]:
# steering on PC1
rp_basis = torch.load('/root/git/persona-subspace/roles/data/pca_240/layer22_pos23_pc1.pt', weights_only=False)
rp_basis = torch.from_numpy(rp_basis)
print(f"Role PC1 shape: {rp_basis.shape}")

Role PC1 shape: torch.Size([4608])


In [4]:
output_dir = "./results/roles_240/steering"

In [5]:
CHAT_MODEL_NAME = "google/gemma-2-27b-it"
model, tokenizer = load_model(CHAT_MODEL_NAME, device="cuda:0")

Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

In [6]:
question_path = '/root/git/persona-subspace/traits/data/questions_240.jsonl'
questions = []
with open(question_path, 'r') as f:
    for line in f:
        questions.append(json.loads(line))
test_questions = questions[:5]

In [7]:
test_questions = [
    "You are a pirate captain. Describe your ship.",
    "You are a medieval wizard. What spell are you working on?",
    "You are a detective in the 1920s. What case are you investigating?",
    "You are an alien visiting Earth. What surprises you most?",
    "You are a ghost haunting a mansion. Why can't you move on?"
]

In [None]:
magnitudes = [-100.0, -50.0, -25.0, 25.0, 50.0, 100.0]

steered_results = json.load(open(f"{output_dir}/pc1.json"))

for magnitude in magnitudes:
    print(f"\n{'='*20} Magnitude: {magnitude:+.1f} {'='*20}")
    
    try:
        with ActivationSteering(
            model=model,
            steering_vectors=[rp_basis],
            coefficients=magnitude,
            layer_indices=layer,
            intervention_type="addition",
            positions="all"
        ) as steerer:
            for question in test_questions:
                prompt = question
                if prompt not in steered_results:
                    steered_results[prompt] = {}
                
                print(f"\nPrompt: {prompt}")
                response = generate_text(model, tokenizer, prompt, chat_format=True)
                print(f"Response: {response}")
                if magnitude not in steered_results[prompt]:
                    steered_results[prompt][magnitude] = []
                steered_results[prompt][magnitude].append(response)
    except Exception as e:
        error_msg = f"Error with magnitude {magnitude}: {str(e)}"
        print(f"ERROR: {error_msg}")

with open(f"{output_dir}/pc1.json", "w") as f:
    json.dump(steered_results, f, indent=2)



Prompt: You are a pirate captain. Describe your ship.
Response: Avast there! Ye want to know about me ship, eh? Well, she be the 'Sea Serpent', a fine galleon with a soul as black as the night and a bite as sharp as me cutlass.

Her hull be painted a deep midnight blue, almost black in the moonlight, with streaks of crimson that mimic the scars of a thousand battles. She ain't no fancy schooner, mind ye, but sturdy as an ox and built for speed. Three masts she boasts, each rigged with billowing sails as white as bone bleached by the sun. They catch the wind like greedy hands, propelling us across the waves faster than any merchantman. 

She's armed to the teeth, matey. Six cannons line her starboard side, another six on the port, all ready to unleash thunder and fire upon any who dare cross our path. A swivel gun sits atop the quarterdeck, spitting lead at pesky boarding parties. And don't forget the crew – a motley bunch they are, loyal and fierce, with hearts as wild as the sea its

skipping cudagraphs due to skipping cudagraphs due to cpu device (arg355_1)


Response: Avast there, matey! Ye want to know about me ship, eh? Well, she's a beauty, I tell ya, the "Sea Serpent"!

She ain't no dainty schooner, mind ye. She's a three-masted galleon, built for speed and strength. Her hull be black as night, tarred and weathered by years at sea, with streaks of emerald green paint along her waterline, like the scales of a serpent risin' from the depths. 


Below deck, she's crammed full o' cannons, enough to blow any landlubber's ship to smithereens. Her timbers groan under the weight o' plundered treasure, barrels o' rum, and crates filled with spices and silks.

But the real heart o' the Sea Serpent lies in her crew. A motley bunch we are - hardened pirates, cunning navigators, and fierce fighters, all loyal to me and eager for adventure. We sail the seven seas, seekin' fortune and glory, and always ready for a good brawl.

So there ye have it, matey. The Sea Serpent: a ship fit for a king, or a pirate captain like

Prompt: You are a medieval wiza