In [1]:
%load_ext autoreload
%autoreload 2


In [3]:
from transformer_lens_activations import load_model, attention_analysis

model = load_model("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")



Loaded pretrained model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B into HookedTransformer
Loaded model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
Model has 28 layers
Model has 12 attention heads per layer
Model has 1536 dimensions
Model uses normalization: RMSPre


In [4]:
import torch
with torch.no_grad():
    logits, cache = model.run_with_cache(model.to_tokens("What is 1 + 1? Use Peano axioms."))

In [5]:
import holoviews as hv
import numpy as np
from holoviews import opts

from transformer_lens_activations import show_attention

# Create attention visualizations for all 12 heads in a 3x4 layout
text = "What is 1 + 1? Use Peano axioms."
tokens = model.to_str_tokens(text)

# Generate attention patterns for all heads
attention_maps = []
layer_idx = 3
for head_idx in range(12):
    attn = attention_analysis(model, cache, model.to_tokens(text), head_idx=head_idx, layer_idx=layer_idx)
    attn[attn == -np.inf] = np.nan
    
    # Use show_attention method to create visualization
    heatmap = show_attention(attn, tokens=tokens)
    attention_maps.append(heatmap)

# Create 3x4 layout
layout = hv.Layout(attention_maps).cols(4).opts(
    title='Attention Patterns Across All 12 Heads'
).opts(opts.HeatMap(axiswise=True))

layout

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 0 attention shape: torch.Size([14, 14])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 1 attention shape: torch.Size([14, 14])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 2 attention shape: torch.Size([14, 14])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 3 attention shape: torch.Size([14, 14])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 4 attention shape: torch.Size([14, 14])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 14, 14])
Shape breakdown: [batch, hea

In [6]:
from datasets import load_dataset

ds_algebra = load_dataset("EleutherAI/hendrycks_math", "algebra")
ds_counting_and_probability = load_dataset("EleutherAI/hendrycks_math", "counting_and_probability")
ds_geometry = load_dataset("EleutherAI/hendrycks_math", "geometry")
ds_intermediate_algebra = load_dataset("EleutherAI/hendrycks_math", "intermediate_algebra")
ds_number_theory = load_dataset("EleutherAI/hendrycks_math", "number_theory")
ds_prealgebra = load_dataset("EleutherAI/hendrycks_math", "prealgebra")
ds_precalculus = load_dataset("EleutherAI/hendrycks_math", "precalculus")

In [7]:
problem = ds_algebra['train'].to_pandas().problem[1]

In [8]:
import torch
with torch.no_grad():
    logits, cache = model.run_with_cache(model.to_tokens(problem))

In [9]:
from transformer_lens_activations import attention_analysis, show_all_attention_heads

show_all_attention_heads(model, cache, problem, layer_idx=15)

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 0 attention shape: torch.Size([113, 113])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 1 attention shape: torch.Size([113, 113])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 2 attention shape: torch.Size([113, 113])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 3 attention shape: torch.Size([113, 113])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape breakdown: [batch, heads, seq_len, seq_len]
Head 4 attention shape: torch.Size([113, 113])

=== Attention Analysis ===

Attention patterns shape: torch.Size([1, 12, 113, 113])
Shape 

In [None]:
from transformer_lens_activations import get_example_probe_tasks, train_linear_probe


example_tasks = get_example_probe_tasks(sample_size=200)

In [11]:
example_tasks

{'math_category': {'texts': ['Let \\[f(x) = \\left\\{\n\\begin{array}{cl} ax+3, &\\text{ if }x>2, \\\\\nx-5 &\\text{ if } -2 \\le x \\le 2, \\\\\n2x-b &\\text{ if } x <-2.\n\\end{array}\n\\right.\\]Find $a+b$ if the piecewise function is continuous (which means that its graph can be drawn without lifting your pencil from the paper).',
   'A rectangular band formation is a formation with $m$ band members in each of $r$ rows, where $m$ and $r$ are integers. A particular band has less than 100 band members. The director arranges them in a rectangular formation and finds that he has two members left over. If he increases the number of members in each row by 1 and reduces the number of rows by 2, there are exactly enough places in the new formation for each band member. What is the largest number of members the band could have?',
   'What is the degree of the polynomial $(4 +5x^3 +100 +2\\pi x^4 + \\sqrt{10}x^4 +9)$?',
   'Evaluate $\\left\\lceil3\\left(6-\\frac12\\right)\\right\\rceil$.',
