In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import svd

from pattern_lens.activations import activations_main
from pattern_lens.attn_figure_funcs import register_attn_figure_func
from pattern_lens.figure_util import matplotlib_figure_saver, save_matrix_wrapper
from pattern_lens.figures import figures_main

# defining your own functions

by default, only the raw attention matrices (saved as png) are saved. you can define your own functions like this:

In [2]:
# define and register your own functions
# don't take these too seriously, they're just examples


# using matplotlib_figure_saver -- define a function that takes matrix and `plt.Axes`, modify the axes
@register_attn_figure_func
@matplotlib_figure_saver(fmt="svgz")
def svd_spectra(attn_matrix: np.ndarray, ax: plt.Axes) -> None:
	# Perform SVD
	U, s, Vh = svd(attn_matrix)

	# Plot singular values
	ax.plot(s, "o-")
	ax.set_yscale("log")
	ax.set_xlabel("Singular Value Index")
	ax.set_ylabel("Singular Value")
	ax.set_title("Singular Value Spectrum of Attention Matrix")


# manually creating and saving a figure
@register_attn_figure_func
def attention_flow(attn_matrix: np.ndarray, path: Path) -> None:
	"""Visualize attention as flows between tokens.

	Creates a simplified Sankey-style diagram where line thickness and color
	intensity represent attention strength.
	"""
	fig, ax = plt.subplots(figsize=(6, 6))
	n_tokens: int = attn_matrix.shape[0]

	# Create positions for tokens on left and right
	left_pos: np.ndarray = np.arange(n_tokens)
	right_pos: np.ndarray = np.arange(n_tokens)

	# Plot flows
	for i in range(n_tokens):
		for j in range(n_tokens):
			weight = attn_matrix[i, j]
			if weight > 0.05:  # Only plot stronger connections
				ax.plot(
					[0, 1],
					[left_pos[i], right_pos[j]],
					alpha=weight,
					linewidth=weight * 5,
					color="blue",
				)

	ax.set_xlim(-0.1, 1.1)
	ax.set_ylim(-1, n_tokens)
	ax.axis("off")
	ax.set_title("Attention Flow Between Positions")

	# be sure to save the figure as `function_name.format` in the given location
	fig.savefig(path / "attention_flow.svgz", format="svgz")


@register_attn_figure_func
@save_matrix_wrapper(fmt="svgz")
def gram_matrix(attn_matrix: np.ndarray) -> np.ndarray:
	return attn_matrix @ attn_matrix.T

# running the pipeline

In [3]:
# set up directories
PATH: Path = Path("tests/_temp/nb-demo/")
PATH.mkdir(parents=True, exist_ok=True)

In [4]:
# generate activations
activations_main(
	model_name="pythia-14m",
	save_path=PATH,
	prompts_path="data/pile_5.jsonl",
	min_chars=10,
	max_chars=100,
	n_samples=5,
	raw_prompts=True,
	force=True,
	no_index_html=False,
)

using device: cuda
\ (0.80s) loading model                                                        Loaded pretrained model pythia-14m into HookedTransformer
✔️  (0.87s) loading model                                                      
loaded pythia-14m with 14M (14114688) parameters
	model devices: {device(type='cuda', index=0)}
✔️  (0.00s) saving model info to tests/_temp/nb-demo/pythia-14m                
✔️  (0.00s) loading prompts from prompts_path = 'data/pile_5.jsonl'            
5 prompts loaded
✔️  (0.00s) writing index.html                                                 


Computing activations: 100%|██████████| 5/5 [00:00<00:00, 18.96prompt/s]

✔️  (0.00s) updating jsonl metadata for models and prompts                     





In [5]:
# generate figures
figures_main(
	model_name="pythia-14m",
	save_path=PATH,
	n_samples=5,
	force=True,
)

✔️  (0.00s) setting up paths                                                   
✔️  (0.00s) loading prompts                                                    
5 prompts loaded
4 figure functions loaded
	raw, svd_spectra, attention_flow, gram_matrix
chunksize: 1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

✔️  (0.00s) updating jsonl metadata for models and functions                   



