# Intro

![Machine Learning](https://imgs.xkcd.com/comics/machine_learning_2x.png)

# Setup

Ezt egyszerüen elloptam egy példából, nekünk nem kell a sok bonyolítás belőle. De szerintem kúl látni.

Sőt, lénygében az egész python / jupyter / colab tudás néhány példából van másolva. Lásd a [Források](#Források) részt.

In [1]:
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    %pip install einops
    %pip install jaxtyping
    %pip install transformer_lens
    %pip install git+https://github.com/callummcdougall/CircuitsVis.git#subdirectory=python
else:
    # See README.md for local setup
    pass

In [2]:
import os
import sys
import plotly.express as px
import torch as t
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path
import numpy as np
import einops
from jaxtyping import Int, Float
from typing import List, Optional, Tuple
import functools
from tqdm import tqdm
from IPython.display import display
import webbrowser
import gdown
from transformer_lens.hook_points import HookPoint
from transformer_lens import utils, HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache
from transformer_lens.utils import get_corner
import circuitsvis as cv

# Saves computation time, since we don't need it for the contents of this notebook
t.set_grad_enabled(False)

device = t.device("cuda" if t.cuda.is_available() else "cpu")

# Ismerkedés a modellel

In [3]:
gpt2 = HookedTransformer.from_pretrained("gpt2-small")

Loaded pretrained model gpt2-small into HookedTransformer


In [None]:
print(gpt2)
print(gpt2.cfg)

## Input: "Mit eszik ez?" avagy Tokenization

In [4]:
gpt2.tokenizer

GPT2TokenizerFast(name_or_path='gpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}

In [None]:
gpt2.tokenizer.vocab

In [None]:
vocab_sorted = sorted(gpt2.tokenizer.vocab.items(), key=lambda x: x[1])
vocab_sorted[-20:]

[('Revolution', 50237),
 ('Ġsnipers', 50238),
 ('Ġreverted', 50239),
 ('Ġconglomerate', 50240),
 ('Terry', 50241),
 ('794', 50242),
 ('Ġharsher', 50243),
 ('Ġdesolate', 50244),
 ('ĠHitman', 50245),
 ('Commission', 50246),
 ('Ġ(/', 50247),
 ('âĢ¦."', 50248),
 ('Compar', 50249),
 ('Ġamplification', 50250),
 ('ominated', 50251),
 ('Ġregress', 50252),
 ('ĠCollider', 50253),
 ('Ġinformants', 50254),
 ('Ġgazed', 50255),
 ('<|endoftext|>', 50256)]

In [6]:
# szoveg = '''## Loading Models

# HookedTransformer comes loaded with >40 open source GPT-style models. You can load any of them in with `HookedTransformer.from_pretrained(MODEL_NAME)`. Each model is loaded into the consistent HookedTransformer architecture, designed to be clean, consistent and interpretability-friendly.

# For this demo notebook we'll look at GPT-2 Small, an 80M parameter model. To try the model the model out, let's find the loss on this paragraph!'''

# szoveg = "Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets."

szoveg = "This is a story about Quomatarus. When one day Quomatarus decided to do something different and bought a plane ticket to Lamanandu. When he arrived to the airport Quomatarus noticed"

In [None]:
gpt2.tokenizer.encode(szoveg)

In [8]:
# gpt2.to_str_tokens(szoveg)
tokenek = gpt2.to_tokens(szoveg)
str_tokenek = gpt2.to_str_tokens(szoveg)
print(tokenek.shape)

torch.Size([1, 45])


### Embedding

In [9]:
gpt2.W_E.shape

torch.Size([50257, 768])

In [12]:
beagyazva = gpt2.W_E[tokenek, :]
print(beagyazva.shape)
utils.get_corner(beagyazva)

torch.Size([1, 45, 768])


tensor([[[ 0.0517, -0.0274,  0.0502],
         [ 0.0270, -0.0939,  0.0738],
         [-0.0078,  0.0120,  0.0575]]], device='cuda:0')

## Mit köp ki?

In [None]:
# gpt2(tokenek, return_type="loss")
gpt2(tokenek).shape

torch.Size([1, 45, 50257])

In [13]:
# logits, cache = gpt2.run_with_cache(tokenek, remove_batch_dim=True)
logits, cache = gpt2.run_with_cache(tokenek)
print(logits.shape)
print(cache)

torch.Size([1, 45, 50257])
ActivationCache with keys ['hook_embed', 'hook_pos_embed', 'blocks.0.hook_resid_pre', 'blocks.0.ln1.hook_scale', 'blocks.0.ln1.hook_normalized', 'blocks.0.attn.hook_q', 'blocks.0.attn.hook_k', 'blocks.0.attn.hook_v', 'blocks.0.attn.hook_attn_scores', 'blocks.0.attn.hook_pattern', 'blocks.0.attn.hook_z', 'blocks.0.hook_attn_out', 'blocks.0.hook_resid_mid', 'blocks.0.ln2.hook_scale', 'blocks.0.ln2.hook_normalized', 'blocks.0.mlp.hook_pre', 'blocks.0.mlp.hook_post', 'blocks.0.hook_mlp_out', 'blocks.0.hook_resid_post', 'blocks.1.hook_resid_pre', 'blocks.1.ln1.hook_scale', 'blocks.1.ln1.hook_normalized', 'blocks.1.attn.hook_q', 'blocks.1.attn.hook_k', 'blocks.1.attn.hook_v', 'blocks.1.attn.hook_attn_scores', 'blocks.1.attn.hook_pattern', 'blocks.1.attn.hook_z', 'blocks.1.hook_attn_out', 'blocks.1.hook_resid_mid', 'blocks.1.ln2.hook_scale', 'blocks.1.ln2.hook_normalized', 'blocks.1.mlp.hook_pre', 'blocks.1.mlp.hook_post', 'blocks.1.hook_mlp_out', 'blocks.1.hook_res

In [23]:
probs = logits.squeeze().softmax(dim=-1)
print(utils.get_corner(probs))
einops.reduce(probs, 'pos token -> pos', 'sum')
t.round(probs[:, tokenek.squeeze()].diag(1), decimals=3)

tensor([[6.6197e-04, 2.4113e-02, 9.5430e-04],
        [1.1472e-05, 1.7084e-05, 5.7139e-07],
        [1.9788e-05, 7.8417e-06, 1.0032e-06]], device='cuda:0')


tensor([0.0080, 0.1840, 0.3330, 0.0130, 0.4980, 0.0000, 0.0000, 0.0000, 0.0000,
        0.1410, 0.0040, 0.0090, 0.0150, 0.0260, 0.9140, 0.9800, 0.9960, 0.0090,
        0.8060, 0.0170, 0.5500, 0.0160, 0.0380, 0.0010, 0.4080, 0.0110, 0.0940,
        0.5280, 0.0020, 0.0010, 0.0020, 0.0050, 0.0320, 0.0190, 0.3050, 0.0910,
        0.0140, 0.0970, 0.0600, 0.1080, 0.9930, 0.9940, 0.9980, 0.0200],
       device='cuda:0')

In [None]:
utso = logits[0,44]
print(utso.argmax())
gpt2.tokenizer.decode(utso.argmax())

In [17]:
log_probs = logits.squeeze().log_softmax(dim=-1)
token_log_probs1 = log_probs[:, tokenek.squeeze()].diag(1)
# print(token_log_probs1)

token_log_probs = log_probs[:, :-1].gather(dim=-1, index=tokenek.squeeze()[1:, None]).squeeze()
token_log_probs

tensor([-4.8580e+00, -1.6949e+00, -1.1008e+00, -4.3222e+00, -6.9673e-01,
        -1.0088e+01, -8.0420e+00, -1.0918e+01, -9.2068e+00, -1.9596e+00,
        -5.5019e+00, -4.7312e+00, -4.1974e+00, -3.6655e+00, -9.0203e-02,
        -1.9922e-02, -3.7248e-03, -4.6675e+00, -2.1606e-01, -4.0747e+00,
        -5.9848e-01, -4.1610e+00, -3.2812e+00, -7.3975e+00, -8.9541e-01,
        -4.5045e+00, -2.3666e+00, -6.3920e-01, -6.3236e+00, -7.3244e+00,
        -6.1945e+00, -5.3811e+00, -3.4567e+00, -3.9479e+00, -1.1884e+00,
        -2.4014e+00, -4.2923e+00, -2.3332e+00, -2.8089e+00, -2.2295e+00,
        -6.7844e-03, -5.6575e-03, -2.0999e-03, -3.9126e+00], device='cuda:0')

In [25]:
# plot_loss_difference(token_log_probs, str_tokenek, 0)
px.line(utils.to_numpy(token_log_probs), hover_name=str_tokenek[1:])

# Struktúra

## Hogy néznek ki a "nagyok"?

- GPT-3: https://arxiv.org/abs/2005.14165v4
- PaLM: https://jmlr.org/papers/v24/22-1144.html
- LLaMA: https://arxiv.org/abs/2302.13971

In [40]:
for name, p in gpt2.named_parameters():
  if ".0." in name or "blocks" not in name:
    print(name, p.shape)

embed.W_E torch.Size([50257, 768])
pos_embed.W_pos torch.Size([1024, 768])
blocks.0.attn.W_Q torch.Size([12, 768, 64])
blocks.0.attn.W_K torch.Size([12, 768, 64])
blocks.0.attn.W_V torch.Size([12, 768, 64])
blocks.0.attn.W_O torch.Size([12, 64, 768])
blocks.0.attn.b_Q torch.Size([12, 64])
blocks.0.attn.b_K torch.Size([12, 64])
blocks.0.attn.b_V torch.Size([12, 64])
blocks.0.attn.b_O torch.Size([768])
blocks.0.mlp.W_in torch.Size([768, 3072])
blocks.0.mlp.b_in torch.Size([3072])
blocks.0.mlp.W_out torch.Size([3072, 768])
blocks.0.mlp.b_out torch.Size([768])
unembed.W_U torch.Size([768, 50257])
unembed.b_U torch.Size([50257])


In [45]:
for activation_name, activation in cache.items():
    # Only print for first layer
    if ".0." in activation_name or "blocks" not in activation_name:
        print(activation_name, activation.shape)

hook_embed torch.Size([1, 45, 768])
hook_pos_embed torch.Size([1, 45, 768])
blocks.0.hook_resid_pre torch.Size([1, 45, 768])
blocks.0.ln1.hook_scale torch.Size([1, 45, 1])
blocks.0.ln1.hook_normalized torch.Size([1, 45, 768])
blocks.0.attn.hook_q torch.Size([1, 45, 12, 64])
blocks.0.attn.hook_k torch.Size([1, 45, 12, 64])
blocks.0.attn.hook_v torch.Size([1, 45, 12, 64])
blocks.0.attn.hook_attn_scores torch.Size([1, 12, 45, 45])
blocks.0.attn.hook_pattern torch.Size([1, 12, 45, 45])
blocks.0.attn.hook_z torch.Size([1, 45, 12, 64])
blocks.0.hook_attn_out torch.Size([1, 45, 768])
blocks.0.hook_resid_mid torch.Size([1, 45, 768])
blocks.0.ln2.hook_scale torch.Size([1, 45, 1])
blocks.0.ln2.hook_normalized torch.Size([1, 45, 768])
blocks.0.mlp.hook_pre torch.Size([1, 45, 3072])
blocks.0.mlp.hook_post torch.Size([1, 45, 3072])
blocks.0.hook_mlp_out torch.Size([1, 45, 768])
blocks.0.hook_resid_post torch.Size([1, 45, 768])
ln_final.hook_scale torch.Size([1, 45, 1])
ln_final.hook_normalized torc

In [42]:
from fancy_einsum import einsum

In [48]:
mlp_elott = cache["normalized", 0, "ln2"]
# gpt2.W_in[0].shape
mlp_kozepe = einsum("batch pos d_model, d_model d_mlp -> batch pos d_mlp", mlp_elott, gpt2.W_in[0]) + gpt2.b_in[0]
print(utils.get_corner(mlp_kozepe))
print(utils.get_corner(cache["pre", 0, "mlp"]))

tensor([[[-0.1944, -2.0492, -2.7343],
         [ 0.3661, -1.2688, -1.3038],
         [ 0.0980, -1.5448, -1.3435]]], device='cuda:0')
tensor([[[-0.1944, -2.0492, -2.7343],
         [ 0.3661, -1.2688, -1.3038],
         [ 0.0980, -1.5448, -1.3435]]], device='cuda:0')


In [50]:
mlp_kozepe2 = utils.gelu_new(mlp_kozepe)
# print(utils.get_corner(mlp_kozepe2))
# print(utils.get_corner(cache["post", 0, "mlp"]))
mlp_utan = einsum("batch pos d_mlp, d_mlp d_model -> batch pos d_model", mlp_kozepe2, gpt2.W_out[0]) + gpt2.b_out[0]
print(utils.get_corner(mlp_utan))
print(utils.get_corner(cache["mlp_out", 0]))


tensor([[[-0.5169,  0.2836,  0.4329],
         [-0.6278, -0.1156,  1.0684],
         [-1.6660,  0.3645, -0.8681]]], device='cuda:0')
tensor([[[-0.5169,  0.2836,  0.4329],
         [-0.6278, -0.1156,  1.0684],
         [-1.6660,  0.3645, -0.8681]]], device='cuda:0')


In [51]:
from transformer_lens.utils import get_corner

In [64]:
figyelem = cache["pattern", 0].squeeze()
print(figyelem.shape)
cv.attention.attention_pattern(attention=figyelem[5], tokens=str_tokenek)
# Compare block 0 head 5 to block 5 head 5!

torch.Size([12, 45, 45])


# Induction Heads

In [57]:
# for layer in range(gpt2.cfg.n_layers):
    # attention_pattern = cache["pattern", layer]
    # display(cv.attention.attention_patterns(tokens=str_tokenek, attention=attention_pattern))

attention_pattern = cache["pattern", 5, "attn"].squeeze()
# print(utils.get_corner(attention_pattern))
print(attention_pattern.shape)

valami = cv.attention.attention_patterns(
    tokens=str_tokenek,
    attention=attention_pattern,
    attention_head_names=[f"L5H{i}" for i in range(12)],
)
# print(valami)
display(valami)

torch.Size([12, 45, 45])


# Források

Ezek a fő inspirációk:

* https://arena-ch1-transformers.streamlit.app/[1.2]_Intro_to_Mech_Interp
* https://transformer-circuits.pub/2021/framework/index.html

Videó:

* https://neelnanda.io/transformer-tutorial

Egyéb:

* https://www.lesswrong.com/posts/TvrfY4c9eaGLeyDkE/induction-heads-illustrated
