# Visualing the Attention Heads of GPT-2 

This experiment is in visualizing the attention heads of GPT-2. I want to see
- which token the different heads of a single layer pay attention to
- which token the same heads in different layers pay attention to. 

Furthermore, I will try this same visualization with causal language modelling to see how the generation of tokens depends on the existing context. All the results will be stored in the `./results` directory

In [1]:
import torch as t
import pandas as pd
from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel
from datasets import load_dataset
import altair as alt

device = "cuda" if t.cuda.is_available() else "cpu"
device = "mps" if t.backends.mps.is_available() else "cpu"

In [2]:
model_id = "openai-community/gpt2-medium"
model = AutoModel.from_pretrained(model_id, output_attentions=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True)

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [3]:
txt = "The cat ate the mat. And then sat on a rat. But what he wanted was a hat."
inputs = tokenizer.encode(txt, return_tensors='pt')
inputs.shape, inputs

(torch.Size([1, 21]),
 tensor([[  383,  3797, 15063,   262,  2603,    13,   843,   788,  3332,   319,
            257,  4227,    13,   887,   644,   339,  2227,   373,   257,  6877,
             13]]))

In [4]:
attns = model(inputs).attentions
first_layer_attn = attns[0]
first_layer_attn.shape # B, N_h, L_s, L_s

torch.Size([1, 16, 21, 21])

## Attention Map Functions 

In [5]:
def mtx2df(m, max_row, max_col, row_tokens, col_tokens):
    "convert a dense matrix to a data frame with row and column indices"
    return pd.DataFrame(
        [
            (
                r,
                c,
                float(m[r, c]),
                "%s"
                % row_tokens[r] if len(row_tokens) > r else "<blank>",
                "%s"
                % col_tokens[c] if len(col_tokens) > c else "<blank>",
            )
            for r in range(m.shape[0])
            for c in range(m.shape[1])
            if r < max_row and c < max_col
        ],
        # if float(m[r,c]) != 0 and r < max_row and c < max_col],
        columns=["row", "column", "value", "row_token", "col_token"],
    )



The following functions build the backbone for the attention visualization workflow. This currently only works for **decoder-only models** because we will only be displaying decoder self-attention and no cross attention.
- `visualize_head` will output a altair heatmap of a single head in the MHA mechanism
- `visualize_layer` will output a concatenation of altair heatmaps of a single layer
- `visualize_model_attns` will output the heatmaps for a single piece of text

In [6]:
def visualize_head(attn, head, row_tokens, col_tokens, max_dim=30):
    df = mtx2df(attn[0, head].data, max_dim, max_dim, row_tokens, col_tokens)
    return (
        alt.Chart(data=df)
        .mark_rect()
        .encode(
            x=alt.X("col_token", axis=alt.Axis(title="")),
            y=alt.Y("row_token", axis=alt.Axis(title="")),
            color="value",
            tooltip=["row", "column", "value", "row_token", "col_token"],
        )
        .properties(title=f"Head {head}", height=200, width=200)
        .interactive()
    )

def visualize_layer(attn, layer, heads, ntokens, row_words, col_words):
    charts = [
        visualize_head(
            attn,
            h,
            row_tokens=row_words,
            col_tokens=col_words,
            max_dim=ntokens,
        )
        for h in heads
    ]
    return alt.hconcat(*charts).properties(title=f"Layer {layer}")

    
def visualize_model_attns(model, tokenizer, text: str, view_layers:list=[], view_heads:list=[]): 
    """ Given a model, a tokinizer and an input string, will output the attentin heat maps for the specified layers 
        and mutli-attention heads. 

    Args:
        model: Model to run the text through 
        tokenizer: Tokenizer for the text 
        text (str): Text to be visualized 
        view_layers (list, optional): The layers that you want displayed. Every element must be 0 <= l < N_LAYERS. Defaults to [].
        view_heads (list, optional): The heads that you want displayed. Every element must be 0 <= l < N_HEADS. Defaults to [].

    Returns:
        altair heat map. 
    """

    tokens = tokenizer.encode(text, return_tensors='pt')
    n_tokens = tokens.size(-1)
    words = tokenizer.convert_ids_to_tokens(tokens[0])
    outputs = model(tokens)
    attns = outputs.attentions
    print(len(attns))
    print(outputs.keys())
    
    layer_maps = [] 
    for layer_num in view_layers:
        layer_maps.append(visualize_layer(attns[layer_num], layer_num, view_heads, n_tokens, words, words)) 

    return  alt.vconcat(*layer_maps)

## Experiments 

Here's a few experiments that I' m trying out and seeing how the attention maps turn out

In [7]:
visualize_model_attns(model, tokenizer, "Jonathanapisario wanted cheese so he ate a pizza", view_layers=[0, 1, 2, 3, 4, 20, 21, 22, 23], view_heads=list(range(0, 8)))

24
odict_keys(['last_hidden_state', 'past_key_values', 'attentions'])


In [8]:
visualize_model_attns(model, tokenizer, "John wanted cheese so he ate a pizza", view_layers=list(range(0, 24)), view_heads=[1, 2])

24
odict_keys(['last_hidden_state', 'past_key_values', 'attentions'])


## Causal Language Modelling and Attention

Now I want to move onto causual language modelling and seeing the relationship between generated token probabilities and the attention between generated tokens and the attention heads across the layers

In [9]:
def visualize_token_attn(attn, n_tokens, row_words, col_words, view_layers:list=[], view_heads:list=[]):
    layer_maps = [] 
    for layer_num in view_layers:
        layer_maps.append(visualize_layer(attn, layer_num, view_heads, n_tokens, row_words, col_words))

    return  alt.vconcat(*layer_maps) 

def visualize_lm_attns(model, tokenizer, text, view_layers:list=[], view_heads:list=[]):
    """ Given a model, a tokinizer and an input string, will output the attentin heat maps for the specified layers 
        and mutli-attention heads. 

    Args:
        model: Model to run the text through 
        tokenizer: Tokenizer for the text 
        text (str): Text to be visualized 
        view_layers (list, optional): The layers that you want displayed. Every element must be 0 <= l < N_LAYERS. Defaults to [].
        view_heads (list, optional): The heads that you want displayed. Every element must be 0 <= l < N_HEADS. Defaults to [].

    Returns:
        altair heat map. 
    """

    tokens = tokenizer(text, return_tensors='pt')
    n_input_tokens = tokens.input_ids.size(-1)
    words = tokenizer.convert_ids_to_tokens(tokens.input_ids[0])
    print(words)
    outputs = model.generate(**tokens, max_new_tokens=10,  return_dict_in_generate=True, output_scores=True)
    pred_tokens = outputs.sequences[0].tolist()
    pred_words = tokenizer.convert_ids_to_tokens(pred_tokens)
    print(pred_words)
    n_pred_tokens = outputs.sequences.size(-1) - n_input_tokens
    print(n_pred_tokens, n_input_tokens)
    attns = outputs.attentions
    print(pred_tokens)
    print(tokenizer.convert_ids_to_tokens(pred_tokens))
    for i, pred_token in enumerate(pred_tokens[n_input_tokens:]):
        print(f" {i} : {pred_token} => {tokenizer.convert_ids_to_tokens(pred_token)}")
    
    # print(len(attns))
    # print(len(attns[0]))
    # print(attns[0][0].shape)
    # print(attns[1][0].shape)
    # print(attns[2][0].shape)
    # print(attns[3][0].shape)
    print(type(attns[0][0]))
    return visualize_token_attn(attns[1][0], 5, pred_words[1:n_input_tokens+1], pred_words[1:n_input_tokens+1], view_layers=[1, 2, 3, 21, 22, 23], view_heads=list(range(8)))

    # layer_maps = [] 
    # for layer_num in view_layers:
    #     layer_maps.append(visualize_layer(attns[layer_num], layer_num, view_heads, n_tokens, words, words)) 

    # return  alt.vconcat(*layer_maps)

In [10]:
text = "John wanted some pizza so"
model = GPT2LMHeadModel.from_pretrained(model_id, output_attentions=True)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [11]:
visualize_lm_attns(model, tokenizer, text, view_layers=[1, 2], view_heads=[1, 2])

['ĠJohn', 'Ġwanted', 'Ġsome', 'Ġpizza', 'Ġso']
['ĠJohn', 'Ġwanted', 'Ġsome', 'Ġpizza', 'Ġso', 'Ġhe', 'Ġwent', 'Ġto', 'Ġthe', 'Ġpizza', 'Ġplace', 'Ġand', 'Ġordered', 'Ġsome', 'Ġpizza']
10 5
[1757, 2227, 617, 14256, 523, 339, 1816, 284, 262, 14256, 1295, 290, 6149, 617, 14256]
['ĠJohn', 'Ġwanted', 'Ġsome', 'Ġpizza', 'Ġso', 'Ġhe', 'Ġwent', 'Ġto', 'Ġthe', 'Ġpizza', 'Ġplace', 'Ġand', 'Ġordered', 'Ġsome', 'Ġpizza']
 0 : 339 => Ġhe
 1 : 1816 => Ġwent
 2 : 284 => Ġto
 3 : 262 => Ġthe
 4 : 14256 => Ġpizza
 5 : 1295 => Ġplace
 6 : 290 => Ġand
 7 : 6149 => Ġordered
 8 : 617 => Ġsome
 9 : 14256 => Ġpizza
<class 'torch.Tensor'>
