# Installation

Note: for running any R1 Distill model, it is essential to use a GPU runtime.  
I use an A100.

In [1]:
!pip install nnsight

Collecting nnsight
  Downloading nnsight-0.4.3-py3-none-any.whl.metadata (15 kB)
Collecting python-socketio[client] (from nnsight)
  Downloading python_socketio-5.12.1-py3-none-any.whl.metadata (3.2 kB)
Collecting msgspec (from nnsight)
  Downloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.4.0->nnsight)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.4.0->nnsight)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.4.0->nnsight)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.4.0->nnsight)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.w

# Basics

In [2]:
from collections import OrderedDict
import torch

input_size = 5
hidden_dims = 10
output_size = 2

net = torch.nn.Sequential(
    OrderedDict(
        [
            ("layer1", torch.nn.Linear(input_size, hidden_dims)),
            ("layer2", torch.nn.Linear(hidden_dims, output_size)),
        ]
    )
).requires_grad_(False)

In [3]:
import nnsight
from nnsight import NNsight
tiny_model = NNsight(net)

In [5]:
input = torch.rand((1, input_size))
with tiny_model.trace(input) as tracer:
    output = tiny_model.output.save()
print(output)

tensor([[-0.0849,  0.4462]])


In [9]:
with tiny_model.trace(input):
    l1_output = tiny_model.layer1.output.save()
    l2_input = tiny_model.layer2.input.save()
print(l1_output)
print(l2_input)
print('== ? :', torch.equal(l1_output, l2_input))

tensor([[ 0.6817,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])
tensor([[ 0.6817,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])
== ? : True


In [11]:
# LOG -- DEBUG 1
# for logging with small memory overhead
# for debugging
with tiny_model.trace(input) as trace:
    trace.log('l1_output: ', tiny_model.layer1.output)

l1_output:  tensor([[ 0.6817,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])


In [23]:
# SCAN AND VALIDATE -- DEBUG 2
# Scan adn validate is faster than running the whole model
with tiny_model.trace(input, scan=True, validate=True):

    l1_output_before = tiny_model.layer1.output.clone().save()
    tiny_model.layer1.output[:, hidden_dims] = 0 # example shape error
    l1_output_after = tiny_model.layer1.output.save()

print("Before:", l1_output_before)
print("After:", l1_output_after)

IndexError: index 10 is out of bounds for dimension 1 with size 10

In [17]:
# TORCH
# 1. you only need to save the things you want after context
# 2. torch tensor operations all just work -- return values, not tensors
with tiny_model.trace(input):
    l1_l2_diff = (torch.argmax(tiny_model.layer1.output, dim=1) -
                  torch.argmax(tiny_model.layer2.output, dim=1)
                  ).save()
l1_l2_diff

tensor([2])

In [21]:
# APPLY
# with apply, you can apply custom functions in context
def span(t):
    return torch.max(t) - torch.min(t)

with tiny_model.trace(input):
    o = tiny_model.layer1.output
    min = torch.min(o).save()
    max = torch.max(o).save()
    l1_span = nnsight.apply(span, tiny_model.layer1.output).save()
l1_span, min, max, torch.equal(l1_span, max - min)

(tensor(1.4195), tensor(-0.4808), tensor(0.9386), True)

In [22]:
# SETTING
# you can set any model activations directly
with tiny_model.trace(input):
    l1_output1 = tiny_model.layer1.output.clone().save()
    tiny_model.layer1.output[:, 0] = 0
    l1_output2 = tiny_model.layer1.output.save()
print(l1_output1)
print(l1_output2)

tensor([[ 0.6817,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])
tensor([[ 0.0000,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])


In [28]:
# GRAD
# ensure gradients are on
# grad must *ALWAYS* be saved
# NOTE: Can easily find MAXIMALLY EXCITING INPUT THIS WAY, i.e. RECEPTIVE FIELD
with tiny_model.trace(input):
    tiny_model.layer1.output.requires_grad = True
    l1_grad = tiny_model.layer1.output.grad.save()
    l2_grad = tiny_model.layer2.output.grad.save()
    loss = tiny_model.output.sum()
    loss.backward()
print(l1_grad)
print(l2_grad)

tensor([[ 0.1381, -0.1313, -0.0900,  0.2972,  0.0058,  0.0086,  0.2349, -0.4020,
          0.2026, -0.0263]])
tensor([[1., 1.]])


In [30]:
# STOP
# to save runtime / memory if full forward pass is not requried
with tiny_model.trace(input):
    # l2_out = tiny_model.layer2.output.save() # THIS WOULD GIVE AN ERROR
    l1_output = tiny_model.layer1.output.save()
    tiny_model.layer1.output.stop() # STOP!
print(l1_output)

tensor([[ 0.6817,  0.3106,  0.2260,  0.9386,  0.0270,  0.0609,  0.2386,  0.1148,
          0.3613, -0.4808]])


In [31]:
# COND
# conditional interventions
with tiny_model.trace(input) as tracer:

  non_rand_int = 8

  with tracer.cond(non_rand_int > 0):
    with tracer.cond(non_rand_int % 2 == 0):
      tracer.log("Rand Int ", non_rand_int, " is Positive and Even")

Rand Int  8  is Positive and Even


In [None]:
# FOR
# New: Using Python for loops for iterative interventions
with tiny_model.session() as session:

    li = nnsight.list()
    [li.append([num]) for num in range(0, 3)]
    li2 = nnsight.list().save()

    # Using regular for loops
    for item in li:
        for item_2 in item: # for loops can be nested!
            li2.append(item_2)

print("\nList: ", li2)

# LLMs

In [None]:
from nnsight import LanguageModel

In [32]:
# LOAD
# note that "dispatch=True" as an arg loads the model into memory immediately
gpt2 = LanguageModel('openai-community/gpt2', device_map='auto')
print(gpt2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  (generator): Generator(
    (streamer): Streamer()
  )
)


In [33]:
with gpt2.trace("The Eiffel Tower is in the city of"):

    # Access the last layer using h[-1] as it's a ModuleList
    # Access the first index of .output as that's where the hidden states are.
    gpt2.transformer.h[-1].mlp.output[0][:] = 0

    # Logits come out of model.lm_head and we apply argmax to get the predicted token ids.
    token_ids = gpt2.lm_head.output.argmax(dim=-1).save()

print("\nToken IDs:", token_ids)

# Apply the tokenizer to decode the ids into words after the tracing context.
print("Prediction:", gpt2.tokenizer.decode(token_ids[0][-1]))

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]


Token IDs: tensor([[ 262,   12,  417, 8765,   11,  257,  262, 3504,  338, 3576]])
Prediction:  London


In [39]:
# INVOKE -- BATCHING
# Every time invoke is called, the tracer is run with a differnet context
# However, the actual run only happens whtne the overall trace context ends
# HENCE -- BATCHING
# NOTE: MUCH DIFFERENT RESULT WITH trailing space in prompt, "city of " <- 😒
with gpt2.trace() as tracer:

    with tracer.invoke('The Eiffel Tower is in the city of'):
        # Access the last layer using h[-1] as it's a ModuleList
        # Access the first index of .output as that's where the hidden states are.
        gpt2.transformer.h[-1].mlp.output[0][:] = 0
        token_ids_ablated = gpt2.lm_head.output.argmax(dim=-1).save()

    with tracer.invoke('The Eiffel Tower is in the city of'):
        token_ids_normal = gpt2.lm_head.output.argmax(dim=-1).save()

# NORMAL AND ABLATED RUN IN ONE BATCH
print("\nToken IDs:", token_ids_normal)
print("\nToken IDs:", token_ids_ablated)

# Compare normal vs. ablated
print("Prediction:", gpt2.tokenizer.decode(token_ids_normal[0][-1]))
print("Prediction:", gpt2.tokenizer.decode(token_ids_ablated[0][-1]))


Token IDs: tensor([[ 198,   12,  417, 8765,  318,  257,  262, 3504, 7372, 6342]])

Token IDs: tensor([[ 262,   12,  417, 8765,   11,  257,  262, 3504,  338, 3576]])
Prediction:  Paris
Prediction:  London


In [None]:
# Multiple token generation
prompt = 'The Eiffel Tower is in the city of'
layers = model.transformer.h
n_new_tokens = 3
with model.generate(prompt, max_new_tokens=n_new_tokens) as tracer:
    hidden_states = nnsight.list().save() # Initialize & .save() nnsight list

    # Call .all() on model
    model.all()

    # Apply same intervention - set first layer output to zero
    layers[0].output[0][:] = 0

    # Append desired hidden state post-intervention
    hidden_states.append(model.lm_head.output) # no need to call .save

print("Hidden state length: ",len(hidden_states)) # length is 3, as expected!

# Try with R1

In [43]:
# OBSOLETE (?)
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
# model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", torch_dtype=torch.float16)

In [41]:
r1_official_name = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'

In [42]:
# LOAD R1
r1 = LanguageModel(r1_official_name, device_map='auto')
print(r1)

config.json:   0%|          | 0.00/680 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [58]:
user_tag = "<user>"
assistant_tag = "<assistant>"
think_tag = "<think>\n"

# tokenizer and generation settings
settings = {
    "pad_token_id": r1.tokenizer.eos_token_id,  # silence warning
    "temperature": 0.6,
    "max_new_tokens": int(1e3),
    "repetition_penalty": 1.1,  # reduce control jank
}

In [63]:
prompt = f'{user_tag} Which do you think is more insane: an unconscious entity claiming to be conscious, or a conscious entity claiming to be unconscious? {assistant_tag} {think_tag}'

with r1.generate(prompt, **settings) as tracer:
    outputs = nnsight.list().save() # Initialize & .save() nnsight list
    r1.all()
    outputs.append(r1.output)
print(r1.output)d

model.safetensors.index.json:   0%|          | 0.00/28.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.61G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/6.62G [00:00<?, ?B/s]

KeyboardInterrupt: 