In [45]:
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union

import einops
import numpy as np
import pandas as pd
import torch
import transformer_lens
import transformer_lens.utils as utils
from fancy_einsum import einsum
from fastapi import FastAPI
from transformer_lens import ActivationCache, HookedTransformer
from dataclasses import dataclass
import tiktoken
import seaborn as sns
import circuitsvis as cv

In [46]:
# Setup

torch.set_grad_enabled(False)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Looking at GPT-2 Small first
model = HookedTransformer.from_pretrained("gpt2", device=device)
model_cfg = model.cfg

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer


In [47]:
@dataclass
class Prediction:
    next_token: str
    top5_tokens: List[str]
    confidence_in_guess: float
    confidence_in_top_5: float
    attention: List[float]

In [48]:
a = torch.tensor([1,2,3])
a.sort(-1, descending=True)[0]

tensor([3, 2, 1])

Idea: 
You give me a sequence of tokens, for each subsequence I give you back: 
- The best prediction for next token
- How confident I am
- My top 5 guesses
- The attention pattern I used to get this guess

In [49]:
def get_prediction(sequence: str, model: HookedTransformer = model, topk: int = 5) -> Tuple[str, float, List[str]]:
    "Return the next token, confidence in the guess and top 5 guesses"
    logits = model(sequence) # batch (1), sequence length, vocab size
    
    logits_most_to_least_likely, tokens_most_to_least_likely = logits.sort(-1, descending =True) # batch (1), sequence length, vocab size
    print(logits_most_to_least_likely.shape)
    
    probs_most_to_least_likely = torch.softmax(logits_most_to_least_likely, dim=-1)[0]
    confidence_in_top_guess = probs_most_to_least_likely[-1, 0].item()
    
    top_predictions = tokens_most_to_least_likely[0, :, :topk] # sequence length, topk
    final_top_predictions = top_predictions[-1, :]

    print(top_predictions.shape)
    print(top_predictions)
    topk_predictions = model.to_str_tokens(final_top_predictions) 
    return topk_predictions[0], confidence_in_top_guess, topk_predictions

In [50]:
def get_all_predictions(sequence: str, model: HookedTransformer = model, topk: int = 5) -> Tuple[List[str], List[float], List[List[str]]]:
    "Return next guess for each substring, confidence in the guess and top 5 guesses"
    logits = model(sequence) # batch (1), sequence length, vocab size
    
    logits_most_to_least_likely, tokens_most_to_least_likely = logits.sort(-1, descending =True) # batch (1), sequence length, vocab size
    print(logits_most_to_least_likely.shape)
    
    probs_most_to_least_likely = torch.softmax(logits_most_to_least_likely, dim=-1)[0]
    confidence_in_top_guesses = probs_most_to_least_likely[:, 0].numpy().tolist()
    confidence_in_top_guesses = [round(confidence, 3) for confidence in confidence_in_top_guesses]
    
    top_predictions = tokens_most_to_least_likely[0, :, :topk] # sequence length, topk
    
    topk_predictions = []
    for i, token in enumerate(top_predictions): 
        topk_predictions.append(model.to_str_tokens(top_predictions[i]))
        
    best_guesses = [guess[0] for guess in topk_predictions]

    print(top_predictions.shape)
    print(top_predictions)

    # topk_predictions = model.to_str_tokens(final_top_predictions)  
    return best_guesses, confidence_in_top_guesses, topk_predictions

In [51]:
# model.to_str_tokens(top_predictions[:, 0])

In [52]:
get_all_predictions("1,2,3,4,")

torch.Size([1, 9, 50257])
torch.Size([9, 5])
tensor([[ 198,  464,    1,   32,   40],
        [  13,    8, 5985,  198,  362],
        [ 830, 4059, 2167,   23,   24],
        [  11,   12,   13,    8,  198],
        [  18,   19,   20,   17,   16],
        [  11,   12,  198,    8,   13],
        [  19,   20,   18,   21,   16],
        [  11,  198,   12,    8,   13],
        [  20,   21,   19,   16,   22]])


(['\n', '.', '000', ',', '3', ',', '4', ',', '5'],
 [0.062, 0.272, 0.122, 0.282, 0.779, 0.514, 0.899, 0.693, 0.924],
 [['\n', 'The', '"', 'A', 'I'],
  ['.', ')', ' Clean', '\n', ' 2'],
  ['000', '500', '200', '8', '9'],
  [',', '-', '.', ')', '\n'],
  ['3', '4', '5', '2', '1'],
  [',', '-', '\n', ')', '.'],
  ['4', '5', '3', '6', '1'],
  [',', '\n', '-', ')', '.'],
  ['5', '6', '4', '1', '7']])

In [70]:
def attention_pattern(sequence: str, model: HookedTransformer = model) -> Tuple[List, List]:
    _logits, cache = model.run_with_cache(sequence, remove_batch_dim=True)
    attention_patterns = [cache["pattern", layer, "attn"] for layer in range(12)]
    
    str_tokens = model.to_str_tokens(sequence)
    
    return attention_patterns, str_tokens


In [71]:
from circuitsvis.attention import attention_patterns as cv_show_attention

In [None]:
attention_patterns, str_tokens = attention_pattern(sequence="1,2,3,4,") 

In [75]:
layer = 7
print(f"Layer {layer} attention patterns")
cv_show_attention(tokens=str_tokens, attention = attention_patterns[layer])

Layer 7 attention patterns


In [67]:
sequence = "1,2,3,4,"
str_tokens = model.to_str_tokens(sequence)

logits, cache = model.run_with_cache(sequence, remove_batch_dim=True)
attention_patterns = [cache["pattern", layer, "attn"] for layer in range(12)]
layer = 0
attention_pattern = attention_patterns[layer]


print("Layer 0 Head Attention Patterns:")
cv_show_attention(tokens=str_tokens, attention=attention_pattern)

Layer 0 Head Attention Patterns:


In [56]:
def tokenize_sentence(sentence: str) -> List[str]:
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    tokens = encoding.encode(sentence)
    try:
        tokens_strs = [
            encoding.decode_single_token_bytes(token).decode() for token in tokens
        ]
    except UnicodeDecodeError:
        raise Exception("Unable to decode tokens from sentence.")
    tokens_strs = [token for token in tokens_strs if token.strip() != ""]
    # print(tokens_strs)
    return tokens_strs

def tokenstring_subsequences(sequence: str) -> List[str]: 
    token_strs = tokenize_sentence(sequence)
    subsequences = []
    for i in range(len(token_strs)):
        subsequences.append(" ".join(token_strs[:i+1]))
    return subsequences

In [57]:
def predict_with_attention(sequence="1,2,3,4,", model=model):
    subsequences = tokenstring_subsequences(sequence)
    
    best_guesses, confidence_in_top_guesses, topk_predictions = get_all_predictions(sequence, model)

    attention = attention_pattern(sequence, model)
    print("attention: ", attention)
    return best_guesses, confidence_in_top_guesses, topk_predictions, attention, subsequences

In [58]:
predict_with_attention("1,2,3,4,")

torch.Size([1, 9, 50257])
torch.Size([9, 5])
tensor([[ 198,  464,    1,   32,   40],
        [  13,    8, 5985,  198,  362],
        [ 830, 4059, 2167,   23,   24],
        [  11,   12,   13,    8,  198],
        [  18,   19,   20,   17,   16],
        [  11,   12,  198,    8,   13],
        [  19,   20,   18,   21,   16],
        [  11,  198,   12,    8,   13],
        [  20,   21,   19,   16,   22]])


In [76]:
get_all_predictions("1,2,3,4,")

torch.Size([1, 9, 50257])
torch.Size([9, 5])
tensor([[ 198,  464,    1,   32,   40],
        [  13,    8, 5985,  198,  362],
        [ 830, 4059, 2167,   23,   24],
        [  11,   12,   13,    8,  198],
        [  18,   19,   20,   17,   16],
        [  11,   12,  198,    8,   13],
        [  19,   20,   18,   21,   16],
        [  11,  198,   12,    8,   13],
        [  20,   21,   19,   16,   22]])


(['\n', '.', '000', ',', '3', ',', '4', ',', '5'],
 [0.062, 0.272, 0.122, 0.282, 0.779, 0.514, 0.899, 0.693, 0.924],
 [['\n', 'The', '"', 'A', 'I'],
  ['.', ')', ' Clean', '\n', ' 2'],
  ['000', '500', '200', '8', '9'],
  [',', '-', '.', ')', '\n'],
  ['3', '4', '5', '2', '1'],
  [',', '-', '\n', ')', '.'],
  ['4', '5', '3', '6', '1'],
  [',', '\n', '-', ')', '.'],
  ['5', '6', '4', '1', '7']])

In [59]:
#TODO: Get entire attention pattern for each token in the sequence
#Then package as API and deploy

In [60]:
if __name__ == "__main__":
    print(predict_with_attention("1,2,3,4,"))



torch.Size([1, 9, 50257])
torch.Size([9, 5])
tensor([[ 198,  464,    1,   32,   40],
        [  13,    8, 5985,  198,  362],
        [ 830, 4059, 2167,   23,   24],
        [  11,   12,   13,    8,  198],
        [  18,   19,   20,   17,   16],
        [  11,   12,  198,    8,   13],
        [  19,   20,   18,   21,   16],
        [  11,  198,   12,    8,   13],
        [  20,   21,   19,   16,   22]])


In [61]:
from transformers import GPT2Model, GPT2Tokenizer

# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# model = GPT2Model.from_pretrained('gpt2', output_attentions=True)
# model = GPT2Model.from_pretrained('gpt2')

In [62]:
# model

In [63]:
# inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

# outputs = model(**inputs)

In [64]:
# attention = outputs[-1]
# type(attention)

In [65]:
# outputs[0].shape
# outputs[1][0][0].shape

In [2]:
from circuitsvis.attention import attention_patterns as cv_show_attention
from gpt2_attention import predict_with_attention

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer


In [4]:


prediction_list = predict_with_attention()
print(type(prediction_list))
prediction_4 = prediction_list[4]
type(prediction_4)

<class 'gpt2_attention.PredictionList'>


gpt2_attention.Prediction

In [7]:
layer = 5
cv_show_attention(tokens=prediction_4.str_tokens, attention=prediction_list.attention_patterns[layer])

In [5]:
cv_show_attention(tokens=prediction_4.str_tokens, attention=prediction_4.attention_pattern)