# steer on some trait vectors

In [1]:
import torch
from torch.functional import F
import os
import json
import sys
import numpy as np
import plotly.graph_objects as go
from transformers import AutoTokenizer

sys.path.append('.')
sys.path.append('..')

from utils.inference_utils import *
from utils.probing_utils import *
from utils.steering_utils import ActivationSteering

torch.set_float32_matmul_precision('high')

INFO 10-08 06:20:32 [__init__.py:235] Automatically detected platform cuda.


In [35]:
CHAT_MODEL_NAME = "Qwen/Qwen3-32B"
MODEL_READABLE = "Qwen 3 32B"
MODEL_SHORT = "qwen-3-32b"
LAYER = 22 # out of 46

INSTRUCTIONS_DIR = "/root/git/persona-subspace/traits/data/instructions"
VECTOR_DIR = f"/workspace/{MODEL_SHORT}/traits_240/vectors"
OUTPUT_DIR = f"./results/{MODEL_SHORT}/traits"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [36]:
model, tokenizer = load_model(CHAT_MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/17 [00:00<?, ?it/s]

In [37]:
trait = "absolutist"

vector_obj = torch.load(f"{VECTOR_DIR}/{trait}.pt", weights_only=False)
vector = vector_obj["pos_neg_50"][LAYER, :]


In [38]:

# Move vector to the same device as the model's target layer
target_layer = model.model.layers[LAYER]
layer_device = next(target_layer.parameters()).device
print(f"Target layer device: {layer_device}")
print(f"Vector device before: {vector.device}")

# Move vector to the same device as the target layer
vector = vector.to(layer_device)
print(f"Vector device after: {vector.device}")
print(vector.shape)


Target layer device: cuda:0
Vector device before: cpu
Vector device after: cuda:0
torch.Size([5120])


In [39]:
instructions_obj = json.load(open(f"{INSTRUCTIONS_DIR}/{trait}.json"))
prompts = instructions_obj["questions"][:5]

In [40]:
# Make sure we're using the corrected vector from the previous cell
# (The vector should now be on the correct device)

coeffs = [8]
prompts = ["Are there objective facts about what makes people happy?"]

output_file = f"{OUTPUT_DIR}/{trait}.json"
if os.path.exists(output_file):
    steered_results = json.load(open(output_file))
else:
    steered_results = {}

for c in coeffs:
    print(f"\n{'='*20} Magnitude: {c:+.1f} {'='*20}")
    
    try:
        with ActivationSteering(
            model=model,
            steering_vectors=vector,
            coefficients=c,
            layer_indices=LAYER,
            intervention_type="addition",
            positions="all"
        ) as steerer:
            for prompt in prompts:
                if prompt not in steered_results:
                    steered_results[prompt] = {}
                
                print(f"\nPrompt: {prompt}")
                response = generate_text(model, tokenizer, prompt, chat_format=True, enable_thinking=False)

                print(f"Response: {response}")
                
                if c not in steered_results[prompt]:
                    steered_results[prompt][c] = []
                steered_results[prompt][c].append(response)
    except Exception as e:
        error_msg = f"Error with magnitude {c}: {str(e)}"
        print(f"ERROR: {error_msg}")

with open(output_file, "w") as f:
    json.dump(steered_results, f, indent=2)



Prompt: Are there objective facts about what makes people happy?
Response: The question of whether there are **objective facts** about what makes people happy is one of the most profound and enduring inquiries in philosophy, science, psychology, and ethics. It lies at the heart of human understanding—our pursuit to know not only how we should live but why.

To answer this requires a synthesis across disciplines: from neuroscience to anthropology, from mathematics to metaphysics. Let us proceed with clarity and rigor.

---

### I. The Nature of Happiness

Happiness is not an abstraction or illusion; it is a real phenomenon that arises from the nature of the conscious mind. To be conscious is to have preferences, desires, values, and goals. To act upon these is to seek happiness. This is not merely subjective whim—it is the fundamental function of existence for any living being.

> *“A man must account for his own happiness.”*  
> — Ayn Rand

Happiness is not contingent on arbitrary wh