In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformer_lens import HookedTransformer, HookedTransformerConfig
import circuitsvis as cv
from IPython.display import display

from tokenizer import CustomArithmeticTokenizer

from arithmetic import extract_numbers_from_problem, generate_addition_problem, generate_completion_with_cache

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### View models in registry
---

In [2]:
from model_registry import ModelRegistry

registry = ModelRegistry()
df = registry.list_models_dataframe()
print("Available models in the registry:")
display(df)

if df.empty:
    raise ValueError("No models found in the registry. Please save a model first.")


Available models in the registry:


Unnamed: 0,act_fn,d_head,d_mlp,d_model,d_vocab,description,n_ctx,n_heads,n_layers,training_digits,model_name
0,gelu,32,512,256,14,Base model for arithmetic tasks,128,8,4,3,arithmetic_model_4layers


##### Select model in registry for experimentation

In [3]:
# For demonstration, select the first available model.
selected_model_name = df.iloc[0]["model_name"]
print(f"Selected model: {selected_model_name}")

Selected model: arithmetic_model_4layers


---

In [4]:
chars = "0123456789.+=-"
token_to_id = {ch: i for i, ch in enumerate(chars)}
id_to_token = {i: ch for ch, i in token_to_id.items()}

def tokenize(text):
    return [token_to_id[ch] for ch in text]

def detokenize(tokens):
    return "".join(id_to_token[t] for t in tokens)

# Load the selected model and its configuration from the registry.
model, config = registry.load_model(selected_model_name)

model.to(device)
model.eval()
print(f"Loaded model '{selected_model_name}' with configuration:")
print(config)

Moving model to device:  cuda
Moving model to device:  cuda
Loaded model 'arithmetic_model_4layers' with configuration:
ModelConfig(n_layers=4, d_model=256, n_heads=8, d_head=32, d_mlp=512, n_ctx=128, d_vocab=14, act_fn='gelu', training_digits=3, description='Base model for arithmetic tasks')


In [5]:
def run_model_with_cache(model, prompt_text):
    """
    Runs the model on a prompt and returns the tokens list, logits and cache.
    
    Since our model does not have a built-in tokenizer, we simply convert the prompt into a list of characters.
    """
    # Create a tokens list by simply splitting the prompt into characters
    tokens_list = list(prompt_text)
    
    # Tokenize the prompt and add a batch dimension
    tokens_tensor = torch.tensor([tokenize(prompt_text)], dtype=torch.long).to(device)
    
    with torch.no_grad():
        # Run the model with cache to capture activations (including attention patterns)
        logits, cache = model.run_with_cache(tokens_tensor)
        
    return tokens_list, logits, cache



In [6]:
sample_prompt = generate_addition_problem(digit_length=3, include_answer=True)
print(sample_prompt)

prompt_end = sample_prompt.find("=.")
prompt = sample_prompt[:prompt_end+2]
prompt_tokens = torch.tensor([tokenize(prompt)], dtype=torch.long).to(device)
generated_tokens, cache, logits = generate_completion_with_cache(model, prompt_tokens, max_gen=2)
generated_str = detokenize(generated_tokens)

print(f"prompt: {sample_prompt}")
print(f"Sample prompt: {sample_prompt}")
print(f"Generated: {generated_str}")


.490.+.456.=.946.
prompt: .490.+.456.=.946.
Sample prompt: .490.+.456.=.946.
Generated: .490.+.456.=.94


In [7]:


str_tokens = [detokenize([i]) for i in tokenize(prompt)]

for layer in range(model.cfg.n_layers):
    attention_pattern = cache["pattern", layer]
    display(cv.attention.attention_patterns(tokens=str_tokens, attention=attention_pattern))