# Visualing the Attention Maps

This experiment is in visualizing the attention maps in GPT-2. I want to see
- which token the different heads of a single layer pay attention to
- which token the same heads in different layers pay attention to. 

In [12]:
import torch as t
import pandas as pd
from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel
from datasets import load_dataset
import altair as alt

device = "cuda" if t.cuda.is_available() else "cpu"
device = "mps" if t.backends.mps.is_available() else "cpu"

In [4]:
model_id = "openai-community/gpt2-medium"
model = AutoModel.from_pretrained(model_id, output_attentions=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True)

In [5]:
txt = "The cat ate the mat. And then sat on a rat. But what he wanted was a hat."
inputs = tokenizer.encode(txt, return_tensors='pt')
inputs.shape, inputs

(torch.Size([1, 21]),
 tensor([[  383,  3797, 15063,   262,  2603,    13,   843,   788,  3332,   319,
            257,  4227,    13,   887,   644,   339,  2227,   373,   257,  6877,
             13]]))

In [6]:
attns = model(inputs).attentions
first_layer_attn = attns[0]
first_layer_attn.shape # B, N_h, L_s, L_s

torch.Size([1, 16, 21, 21])

## Attention Map Functions 

In [7]:
def mtx2df(m, max_row, max_col, row_tokens, col_tokens):
    "convert a dense matrix to a data frame with row and column indices"
    return pd.DataFrame(
        [
            (
                r,
                c,
                float(m[r, c]),
                "%s"
                % row_tokens[r] if len(row_tokens) > r else "<blank>",
                "%s"
                % col_tokens[c] if len(col_tokens) > c else "<blank>",
            )
            for r in range(m.shape[0])
            for c in range(m.shape[1])
            if r < max_row and c < max_col
        ],
        # if float(m[r,c]) != 0 and r < max_row and c < max_col],
        columns=["row", "column", "value", "row_token", "col_token"],
    )



The following functions build the backbone for the attention visualization workflow. This currently only works for **decoder-only models** because we will only be displaying decoder self-attention and no cross attention.
- `visualize_head` will output a altair heatmap of a single head in the MHA mechanism
- `visualize_layer` will output a concatenation of altair heatmaps of a single layer
- `visualize_model_attns` will output the heatmaps for a single piece of text

In [8]:
def visualize_head(attn, head, row_tokens, col_tokens, max_dim=30):
    df = mtx2df(attn[0, head].data, max_dim, max_dim, row_tokens, col_tokens)
    return (
        alt.Chart(data=df)
        .mark_rect()
        .encode(
            x=alt.X("col_token", axis=alt.Axis(title="")),
            y=alt.Y("row_token", axis=alt.Axis(title="")),
            color="value",
            tooltip=["row", "column", "value", "row_token", "col_token"],
        )
        .properties(title=f"Head {head}", height=200, width=200)
        .interactive()
    )

def visualize_layer(attn, layer, heads, ntokens, row_words, col_words):
    charts = [
        visualize_head(
            attn,
            h,
            row_tokens=row_words,
            col_tokens=col_words,
            max_dim=ntokens,
        )
        for h in heads
    ]
    return alt.hconcat(*charts).properties(title=f"Layer {layer}")

    
def visualize_model_attns(model, tokenizer, text: str, view_layers:list=[], view_heads:list=[]): 
    """ Given a model, a tokinizer and an input string, will output the attentin heat maps for the specified layers 
        and mutli-attention heads. 

    Args:
        model: Model to run the text through 
        tokenizer: Tokenizer for the text 
        text (str): Text to be visualized 
        view_layers (list, optional): The layers that you want displayed. Every element must be 0 <= l < N_LAYERS. Defaults to [].
        view_heads (list, optional): The heads that you want displayed. Every element must be 0 <= l < N_HEADS. Defaults to [].

    Returns:
        altair heat map. 
    """

    tokens = tokenizer.encode(text, return_tensors='pt')
    n_tokens = tokens.size(-1)
    words = tokenizer.convert_ids_to_tokens(tokens[0])
    attns = model(tokens).attentions
    
    layer_maps = [] 
    for layer_num in view_layers:
        layer_maps.append(visualize_layer(attns[layer_num], layer_num, view_heads, n_tokens, words, words)) 

    return  alt.vconcat(*layer_maps)

## Experiments 

Here's a few experiments that I' m trying out and seeing how the attention maps turn out

In [9]:
visualize_model_attns(model, tokenizer, "John wanted cheese so he ate a pizza", view_layers=[0, 1, 2, 3, 4, 20, 21, 22, 23], view_heads=list(range(0, 8)))

In [10]:
visualize_model_attns(model, tokenizer, "John wanted cheese so he ate a pizza", view_layers=list(range(0, 24)), view_heads=[1, 2])

## Causal Language Modelling and Attention

Now I want to move onto causual language modelling and seeing the relationship between generated token probabilities and the attention between generated tokens and the attention heads across the layers

In [19]:
def visualize_lm_attns(model, tokenizer, text, view_layers:list=[], view_heads:list=[]):
    """ Given a model, a tokinizer and an input string, will output the attentin heat maps for the specified layers 
        and mutli-attention heads. 

    Args:
        model: Model to run the text through 
        tokenizer: Tokenizer for the text 
        text (str): Text to be visualized 
        view_layers (list, optional): The layers that you want displayed. Every element must be 0 <= l < N_LAYERS. Defaults to [].
        view_heads (list, optional): The heads that you want displayed. Every element must be 0 <= l < N_HEADS. Defaults to [].

    Returns:
        altair heat map. 
    """

    tokens = tokenizer.encode(text, return_tensors='pt')
    n_tokens = tokens.size(-1)
    words = tokenizer.convert_ids_to_tokens(tokens[0])
    print(tokens.shape)
    outputs = model.generate(tokens, return_dict_in_generate=True)
    print(outputs.keys())
    print(outputs.sequences.shape)
    attns = outputs.attentions 
    
    layer_maps = [] 
    for layer_num in view_layers:
        layer_maps.append(visualize_layer(attns[layer_num], layer_num, view_heads, n_tokens, words, words)) 

    return  alt.vconcat(*layer_maps)

In [23]:
text = "John wanted some pizza so he"
model = GPT2LMHeadModel.from_pretrained(model_id, output_attentions=True)

In [24]:
visualize_lm_attns(model, tokenizer, text, view_layers=[1, 2], view_heads=[1, 2])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


torch.Size([1, 6])
odict_keys(['sequences', 'attentions', 'past_key_values'])
torch.Size([1, 20])


TypeError: tuple indices must be integers or slices, not tuple