In [1]:
import os
import json
import webbrowser
from neuronpedia.np_vector import NPVector
from dotenv import load_dotenv

# Load NEURONPEDIA_API_KEY from .env file
load_dotenv()

True

In [None]:
import torch

# Load the .pt file
path = os.path.join("../SAE-TS/steer_cfgs/gemma2/anger/", "caao.pt")
caao_tensor = torch.load(path, map_location=torch.device('cpu'), weights_only=True)

# Convert tensor to list of floats 
caao_list = caao_tensor.float().flatten().tolist()

In [58]:
from neuronpedia.requests.base_request import (
    NPRequest,
)
class SteerRequest(NPRequest):
    def __init__(
        self,
    ):
        super().__init__("steer")

    def steer(
        self,
        model_id: str,
        vectors: list[NPVector],
        prompt: str,
        temperature: float = 0.5,
        n_tokens: int = 32,
        freq_penalty: float = 2,
        seed: int = 16,
        strength_multiplier: float = 4,
        steer_special_tokens: bool = True,
    ) :
        # convert the vectors to the feature format
        features = [
            {
                "modelId": vector.model_id,
                "layer": vector.source,
                "index": vector.index,
                "strength": vector.default_steer_strength,
            }
            for vector in vectors
        ]
        payload = {
            "modelId": model_id,
            "features": features,
            "prompt": prompt,
            "temperature": temperature,
            "n_tokens": n_tokens,
            "freq_penalty": freq_penalty,
            "seed": seed,
            "strength_multiplier": strength_multiplier,
            "steer_special_tokens": steer_special_tokens,
        }
        return self.send_request(method="POST", json=payload)

In [61]:
# np_vector = NPVector.get(
#     model_id="gemma-2-2b-it",
#     source="12-neuronpedia-resid-pre",
#     index="170508334"
# )
# print(np_vector)
all = NPVector.get_owned()
def fetch_by_label(label):
    for v in all:
        if v.label == label:
            return v

Sending POST request to https://neuronpedia.org/api/vector/list-owned
Got a successful response.


In [None]:
all = NPVector.get_owned()
print(len(all))
for vec in all:
    if vec.label == "dinosaurs":
        vec.delete()
print(len(all))

Sending POST request to https://neuronpedia.org/api/vector/list-owned
Got a successful response.
[NPVector(label='dinosaurs', model_id='gemma-2-2b', source='20-neuronpedia-resid-pre', index='127843643', values=[-0.01366724912077188, 0.008099760860204697, 0.003968628589063883, 0.03258474171161652, -0.02134944312274456, -0.01244464516639709, -0.01144329831004143, 0.02153209783136845, -0.001104870927520096, -0.02334058471024036, 0.01610567979514599, 0.01355066616088152, 0.01679856330156326, -0.04209789633750916, -0.01706227101385593, -0.004639910534024239, -0.009663445875048637, -0.01911521703004837, -0.04193880409002304, 0.01719758287072182, 0.001729772193357348, 0.00223298417404294, -0.01209195982664824, 0.01501009613275528, -0.01946501992642879, 0.01212128438055515, 0.005174582824110985, 0.01893047243356705, 0.01066785957664251, -0.01134104933589697, 0.04890822619199753, -0.01882336102426052, 0.006089059635996819, -0.02713341638445854, -0.0351865142583847, -0.04441922530531883, 0.02782

In [62]:
from neuronpedia.requests.steer_request import SteerChatRequest
def steer_chat(vec, steered_chat_messages: list[dict[str, str]]):
    return SteerChatRequest().steer(
        model_id=vec.model_id, vectors=[vec], steered_chat_messages=steered_chat_messages, strength_multiplier=4, temperature=1
    )

def steer(vec, prompt ):
    return SteerRequest().steer(
        model_id=vec.model_id, vectors=[vec], prompt=prompt, strength_multiplier=4, temperature=1
    )

responseJson = steer(
    vec=fetch_by_label("gemma2b_anger_caao"),
    prompt="I think"
    # steered_chat_messages=[{"role": "user", "content": "Write a one sentence story."}]
)
print(json.dumps(responseJson, indent=2))

Sending POST request to https://neuronpedia.org/api/steer/
Got a successful response.
{
  "STEERED": "I think it\u2019s unfair that the South has been hit so hard over the past five months.\n\nPresident Donald Trump and his minions have attempted to stoke racial tensions",
  "DEFAULT": "I think it was \"the other\" that said he'd check the timing. Awaiting further news...\n\nWhen an engine is running hotter than normal, I'",
  "id": "cm4s4qnzn00017c9bpbnx4m5u",
  "shareUrl": "https://www.neuronpedia.org/steer/cm4s4qnzn00017c9bpbnx4m5u",
  "limit": "265"
}


In [59]:
import os
import torch
from neuronpedia.np_vector import NPVector

# Set API key
os.environ["NEURONPEDIA_API_KEY"] = "40a11bbc-b788-4d65-ae95-f852f205cdf5"

# Base directory
base_dir = "../SAE-TS/steer_cfgs/gemma2/"

# Define themes and vector types
themes = [
    "anger", "christian_evangelist", "conspiracy", "french",
    "london", "love", "praise", "want_to_die", "wedding"
]
vector_types = ["caao", "caa", "saets", "sae"]

def create_label(theme, vector_type):
    """Create a standardized label for each vector."""
    return f"gemma2b_{theme}_{vector_type}"

def upload_vector(theme, vector_type):
    """Upload a single vector with standardized labeling."""
    path = os.path.join(base_dir, theme, f"{vector_type}.pt")
    
    # Load and process tensor
    tensor = torch.load(path, map_location=torch.device('cpu'), weights_only=True)
    vector_list = tensor.float().flatten().tolist()
    
    # Create and upload vector
    label = create_label(theme, vector_type)
    np_vector = NPVector.new(
        label=label,
        model_id="gemma-2-2b",
        layer_num=12,
        hook_type="hook_resid_pre",
        vector=vector_list,
        default_steer_strength=20,
    )
    return np_vector

# Upload all vectors
uploaded_vectors = []
for theme in themes:
    for vector_type in vector_types:
        try:
            vector = upload_vector(theme, vector_type)
            uploaded_vectors.append(vector)
            print(f"Uploaded: {vector.label}")
        except Exception as e:
            print(f"Error uploading {theme}/{vector_type}: {str(e)}")

print(f"\nTotal vectors uploaded: {len(uploaded_vectors)}")

Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_anger_caao
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_anger_caa
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_anger_saets
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_anger_sae
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_christian_evangelist_caao
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_christian_evangelist_caa
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_christian_evangelist_saets
Sending POST request to https://neuronpedia.org/api/vector/new
Got a successful response.
Uploaded: gemma2b_christian_evang