In [1]:
import sys

sys.path.insert(0, '../')

import torch as t
from nnsight import LanguageModel
from dictionary_learning import ActivationBuffer
from dictionary_learning.interp import examine_dimension
from dictionary_learning.utils import zst_to_generator
from loading_utils import load_submodules_and_dictionaries
from circuitsvis.activations import text_neuron_activations
import gc

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 42
device = 'cuda:0'

model = LanguageModel('EleutherAI/pythia-70m-deduped', device_map=device)
submodules, submodule_names, dictionaries = load_submodules_and_dictionaries(
        model,
        use_attn=True,
        use_mlp=True,
        use_resid=True,
        dict_path="/share/projects/dictionary_circuits/autoencoders/pythia-70m-deduped/",
        dict_size=512*64,
        dict_run_name="5_32768",
        device=device,
)
submodule_names = {v: k for k, v in submodule_names.items()}

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
# Setup Buffer
n_ctxs = 512
ctx_len = 128
data = zst_to_generator('/share/data/datasets/pile/the-eye.eu/public/AI/pile/train/00.jsonl.zst')
buffer = ActivationBuffer(
    data,
    model,
    [submodules[0]], # doesn't matter which submodule, we only use the buffer for contexts
    out_feats=512,
    in_batch_size=128,
    n_ctxs=n_ctxs,
    ctx_len=ctx_len,
    device=device,
)

inputs = buffer.text_batch(batch_size=n_ctxs)
print(f'Inputs shape: {len(inputs)}, {len(inputs[0])}')

Inputs shape: 512, 13274


In [4]:
# Choose a component and feature to examine
submodule_name = "mlp0"
feat_idx = 22005

submodule = submodule_names[submodule_name]
dictionary = dictionaries[submodule]
out = examine_dimension(
    model,
    submodule,
    inputs,
    dictionary,
    dim_idx=feat_idx,
    max_length=ctx_len,
    n_inputs=n_ctxs,
)

# Cleanup memory
t.cuda.empty_cache()
gc.collect()

# Visualize the results
text_neuron_activations(*out.top_contexts)

You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
