### This notebook loads the crosscoders by Julian Minder and Clément Dumas and uploads them to Neuronpedia.
### This also uploads the activations associated with each feature.

In [None]:
%pip install neuronpedia

from dotenv import load_dotenv

# Load NEURONPEDIA_API_KEY from .env file
load_dotenv()


### [less safe] Set Neuronpedia API key manually (get your key from neuronpedia.org/account)
# import os
# os.environ["NEURONPEDIA_API_KEY"] = "YOUR_KEY_HERE"

In [33]:
from huggingface_hub import hf_hub_download
import pandas as pd

NUM_FEATURES_TO_UPLOAD = 100
LAYER_NUM = 13

print("downloading max activating examples from huggingface")
repo_id = "Butanium/max-activating-examples-gemma-2-2b-l13-mu4.1e-02-lr1e-04"
df_path = hf_hub_download(repo_id=repo_id, filename="feature_df.csv", repo_type="dataset")

df = pd.read_csv(df_path, index_col=0)
available_features = df[(df["tag"].isin(["IT only", "Base only"])) & (df["dead"] == False)]
available_features_idx = available_features.index.tolist()

filtered_features = available_features.head(
    NUM_FEATURES_TO_UPLOAD
)
available_features_idx = filtered_features.index.tolist()
print(len(available_features_idx))


downloading max activating examples from huggingface
100


In [34]:
# get the max act examples db and tokenizer
from tiny_dashboard import OfflineFeatureCentricDashboard
from nnterp import load_model
import gc

gemma_2_it = load_model("google/gemma-2-2b-it", device_map="cuda")
db_path = hf_hub_download(repo_id=repo_id, filename="chat_base_examples_20.db", repo_type="dataset")
gc.collect()
db = OfflineFeatureCentricDashboard.from_db(db_path, gemma_2_it.tokenizer, column_name="entries")


In [35]:
# get the crosscoders
from neuronpedia.butanium_dictionary_learning.dictionary_learning import CrossCoder

print("getting the crosscoders")
crosscoder = CrossCoder.from_pretrained("Butanium/gemma-2-2b-crosscoder-l13-mu4.1e-02-lr1e-04", from_hub=True)
print("got the crosscoders")

getting the crosscoders
got the crosscoders


In [36]:
from neuronpedia.np_vector import NPVector
from neuronpedia.requests.activation_request import Activation

counter = 0
created_np_vectors = []
for feature_idx in available_features_idx:
    counter += 1

    print("Uploading vector for feature", feature_idx)
    print("Progress:", counter, "/", len(available_features_idx))

    # get the weights from the crosscoder and upload it
    crosscoder_weight = crosscoder.encoder.weight[1][:, feature_idx].detach().tolist()

    print("Uploading vector for feature", feature_idx)
    np_vector = NPVector.new(
        label="Crosscoder L13 " + str(feature_idx) + " Dumas/Minder",
        model_id="gemma-2-2b-it",
        layer_num=LAYER_NUM,
        hook_type="hook_resid_pre",
        vector=crosscoder_weight,
        default_steer_strength=20,
    )
    created_np_vectors.append(np_vector)
    # get the associated activations and upload them
    featActs = db.max_activation_examples[feature_idx]
    activationsToUpload: list[Activation] = []
    for act in featActs:
        max_activation_value, tokens, activation_values = act
        activation = Activation(
            tokens=tokens,
            values=activation_values,
        )
        activationsToUpload.append(activation)
    print("Uploading activations for feature", feature_idx)
    np_vector.upload_activations(activationsToUpload)

Uploading vector for feature 55
Progress: 1 / 100
Uploading vector for feature 55
Sending POST request to http://localhost:3000/api/vector/new
Got a successful response.
Uploading activations for feature 55
Sending POST request to http://localhost:3000/api/activation/upload-batch
Got a successful response.
Uploading vector for feature 60
Progress: 2 / 100
Uploading vector for feature 60
Sending POST request to http://localhost:3000/api/vector/new
Got a successful response.
Uploading activations for feature 60
Sending POST request to http://localhost:3000/api/activation/upload-batch
Got a successful response.
Uploading vector for feature 78
Progress: 3 / 100
Uploading vector for feature 78
Sending POST request to http://localhost:3000/api/vector/new
Got a successful response.
Uploading activations for feature 78
Sending POST request to http://localhost:3000/api/activation/upload-batch
Got a successful response.
Uploading vector for feature 82
Progress: 4 / 100
Uploading vector for featu

In [37]:
from neuronpedia.np_list import NPList, NPListItem
from neuronpedia.np_vector import NPVector
import webbrowser

# create a new list
new_list = NPList.new("Crosscoder L13 Dumas/Minder - First 100")
print(new_list)
new_list_url = "https://neuronpedia.org/list/" + new_list.id
print(new_list_url)

Sending POST request to http://localhost:3000/api/list/new
Got a successful response.
NPList(id='cm40usirc02mtnoyf8m4ddcvc', name='Crosscoder L13 Dumas/Minder - First 100', description='', items=[])
https://neuronpedia.org/list/cm40usirc02mtnoyf8m4ddcvc


In [38]:
# turn created np vectors into listitems
list_items = [
    NPListItem(
        model_id=vector.model_id, source=vector.source, index=vector.index, description=vector.label
    )
    for vector in created_np_vectors
]

# batch it up into 100 items at a time for upload
batches = [list_items[i : i + 100] for i in range(0, len(list_items), 100)]

# do the upload
for batch in batches:
    print(batch[0])
    print("Adding batch of", len(batch), "items to the list")
    new_list.add_items(batch)

# print(new_list_url)

webbrowser.open(new_list_url)

NPListItem(model_id='gemma-2-2b-it', source='13-neuronpedia-resid-pre', index='165605925', description='Crosscoder L13 55 Dumas/Minder')
Adding batch of 100 items to the list
Sending POST request to http://localhost:3000/api/list/add-features
Got a successful response.


True