In [62]:
import transformer_lens as tl
import torch as t
from torch import Tensor
import einops

device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
print(device)

cuda:0


In [28]:
gpt2_small: tl.HookedTransformer = tl.HookedTransformer.from_pretrained("gpt2-small")

Loaded pretrained model gpt2-small into HookedTransformer


In [29]:
gpt2_small.cfg

HookedTransformerConfig:
{'act_fn': 'gelu_new',
 'attention_dir': 'causal',
 'attn_only': False,
 'attn_types': None,
 'checkpoint_index': None,
 'checkpoint_label_type': None,
 'checkpoint_value': None,
 'd_head': 64,
 'd_mlp': 3072,
 'd_model': 768,
 'd_vocab': 50257,
 'd_vocab_out': 50257,
 'default_prepend_bos': True,
 'device': device(type='cuda'),
 'dtype': torch.float32,
 'eps': 1e-05,
 'final_rms': False,
 'from_checkpoint': False,
 'gated_mlp': False,
 'init_mode': 'gpt2',
 'init_weights': False,
 'initializer_range': 0.02886751345948129,
 'model_name': 'gpt2',
 'n_ctx': 1024,
 'n_devices': 1,
 'n_heads': 12,
 'n_layers': 12,
 'n_params': 84934656,
 'normalization_type': 'LNPre',
 'original_architecture': 'GPT2LMHeadModel',
 'parallel_attn_mlp': False,
 'positional_embedding_type': 'standard',
 'post_embedding_ln': False,
 'rotary_dim': None,
 'scale_attn_by_inverse_layer_idx': False,
 'seed': None,
 'tokenizer_name': 'gpt2',
 'tokenizer_prepends_bos': False,
 'use_attn_in': F

In [30]:
model_description_text = '''## Loading Models

HookedTransformer comes loaded with >40 open source GPT-style models. You can load any of them in with `HookedTransformer.from_pretrained(MODEL_NAME)`. Each model is loaded into the consistent HookedTransformer architecture, designed to be clean, consistent and interpretability-friendly. 

For this demo notebook we'll look at GPT-2 Small, an 80M parameter model. To try the model the model out, let's find the loss on this paragraph!'''

loss = gpt2_small(model_description_text, return_type="loss")
print("Model loss:", loss)

Model loss: tensor(4.3943, device='cuda:0', grad_fn=<NegBackward0>)


In [34]:
print(gpt2_small.to_str_tokens("three hundred seventy two million four hundred nine thousand six hundred seventy nine:372409679"))
print(gpt2_small.to_tokens("three hundred seventy two million four hundred nine thousand six hundred seventy nine:372409679"))
print(gpt2_small.to_string([50256, 70, 457, 17]))

['<|endoftext|>', 'three', ' hundred', ' seventy', ' two', ' million', ' four', ' hundred', ' nine', ' thousand', ' six', ' hundred', ' seventy', ' nine', ':', '37', '24', '09', '679']
tensor([[50256, 15542,  3470, 31989,   734,  1510,  1440,  3470,  5193,  7319,
          2237,  3470, 31989,  5193,    25,  2718,  1731,  2931, 37601]],
       device='cuda:0')
<|endoftext|>gpt2


In [54]:
model_description_text = "three hundred seventy two million four hundred nine thousand six hundred seventy nine:372409679"
logits: Tensor = gpt2_small(model_description_text, return_type="logits")
prediction = logits.argmax(dim=-1).squeeze()[:-1]
print(gpt2_small.to_string(prediction))


- and-. people hundred seventy million five hundred seventy one thousand






In [55]:
true_tokens = gpt2_small.to_tokens(model_description_text).squeeze()[1:]
# count the number of tokens that are equal to the real tokens
num_correct = (prediction == true_tokens).sum()
print(f"Number of correct tokens: {num_correct}/{len(true_tokens)}")

Number of correct tokens: 3/18


In [56]:
gpt2_text = "Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets."
gpt2_tokens = gpt2_small.to_tokens(gpt2_text)
gpt2_logits, gpt2_cache = gpt2_small.run_with_cache(gpt2_tokens, remove_batch_dim=True)

In [57]:
attn_patterns_layer_0 = gpt2_cache["pattern", 0]

In [58]:
print(attn_patterns_layer_0.shape)

torch.Size([12, 33, 33])


In [64]:
q, k = gpt2_cache["q", 0], gpt2_cache["k", 0]
seq, nhead, headsize = q.shape
layer0_attn_scores = einops.einsum(q, k, "seqQ n h, seqK n h -> n seqQ seqK")
mask = t.triu(t.ones((seq, seq), dtype=bool), diagonal=1).to(device)
layer0_attn_scores.masked_fill_(mask, -1e9)
layer0_pattern_from_q_and_k = (layer0_attn_scores / headsize**0.5).softmax(-1)
print(layer0_pattern_from_q_and_k.shape)

torch.Size([12, 33, 33])
