# Adapted from Neel Nanda's Main Demo Notebook

## Setup

In [1]:
# Janky code to do different setup when run in a Colab notebook vs VSCode
DEVELOPMENT_MODE = False
try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
    %pip install git+https://github.com/neelnanda-io/TransformerLens.git
    %pip install circuitsvis
    
    # PySvelte is an unmaintained visualization library, use it as a backup if circuitsvis isn't working
    # # Install another version of node that makes PySvelte work way faster
    # !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs
    # %pip install git+https://github.com/neelnanda-io/PySvelte.git
except:
    IN_COLAB = False
    print("Running as a Jupyter notebook - intended for development only!")
    from IPython import get_ipython

    ipython = get_ipython()
    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel
    ipython.magic("load_ext autoreload")
    ipython.magic("autoreload 2")

Running as a Jupyter notebook - intended for development only!


  ipython.magic("load_ext autoreload")
  ipython.magic("autoreload 2")


In [2]:
# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh
import plotly.io as pio
if IN_COLAB or not DEVELOPMENT_MODE:
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "notebook_connected"
print(f"Using renderer: {pio.renderers.default}")

Using renderer: colab


In [3]:
import circuitsvis as cv
# Testing that the library works
cv.examples.hello("Neel")

In [4]:
# Import stuff
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import einops
from fancy_einsum import einsum
import tqdm.auto as tqdm
import random
from pathlib import Path
import plotly.express as px
from torch.utils.data import DataLoader

from jaxtyping import Float, Int
from typing import List, Union, Optional
from functools import partial
import copy

import itertools
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import dataclasses
import datasets
from IPython.display import HTML

In [5]:
import transformer_lens
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookedRootModule,
    HookPoint,
)  # Hooking utilities
from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache

In [6]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb19967e860>

In [7]:
def imshow(tensor, renderer=None, xaxis="", yaxis="", **kwargs):
    px.imshow(utils.to_numpy(tensor), color_continuous_midpoint=0.0, color_continuous_scale="RdBu", labels={"x":xaxis, "y":yaxis}, **kwargs).show(renderer)

def line(tensor, renderer=None, xaxis="", yaxis="", **kwargs):
    px.line(utils.to_numpy(tensor), labels={"x":xaxis, "y":yaxis}, **kwargs).show(renderer)

def scatter(x, y, xaxis="", yaxis="", caxis="", renderer=None, **kwargs):
    x = utils.to_numpy(x)
    y = utils.to_numpy(y)
    px.scatter(y=y, x=x, labels={"x":xaxis, "y":yaxis, "color":caxis}, **kwargs).show(renderer)

## Loading Models

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [9]:
model = HookedTransformer.from_pretrained("gpt2-small") #.to("cuda",  torch_dtype=torch.float16)

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


## Load Dataset

In [17]:
helpful_multishot_pos = """Ditto to other complaints - won't separate easily- either by unscrewing or prizing. Cheap, flimsy, leaks water also because no gasket. I'm looking for something else and hoping reviewers will give honest good advice . . . this almost identical to product by Munchkin
Is this review positive? no
We tried the Baby warming dish. We found that for our 1 year old it was pretty small, and didn't really hold enough water to keep her warm.I think it would work ok for an infant, although I think a blanket would work just as well.Recommendation:Buy********************HoldSell
Is this review positive? yes
As good as this book may be, the print quality is so bad that I cannot read it. Zero stars would have been more appropriate.
Is this review positive? no
I used the generator for a circuits project and it works pretty well with little resistance, however the wire connections seemed cheap and very loose. I would recommend buying a battery powered one with rechargeable batteries if you're worried about the environment.
Is this review positive? no
The early success of Adam Sandler's films gave him this mistaken impression that he was an actor and a writer. This film proves he is neither - and a grating voice-over guy to boot. I enjoyed Happy Gilmore and to a much lesser degree, Little Nicky and Big Daddy. But Sandler is a one-trick pony. We've all seen the trick, so it's just not interesting anymore. I thought this might be the one Hanukah film in a world of X-mas fare. It has nothing to do with the holiday and is definitely not for kids.
Is this review positive? no
god!!! what a dumb movie. the fact that Adam Sandler can do this is just wrong....never lives up to what if true and it down right smells like cheese....sick and demented jokes dont help its premise...though Whitey gets a few lifes...nicely animated but a poor outcome. think of this as a guide beofer watching it...Jon Lovitz, Rob Schneider and others contribute voices.
Is this review positive? no
A movie that makes you feel a lot better about the holidays. Although not a childish movie, Eight Crazy Nights is a fantastic Sandler movie.
Is this review positive? yes
The movie is vulgar and not meant for children. It is a typical Adam Sandler movie, with foul language and raunchy humor. Not enjoyable at all.
Is this review positive? no
8 Crazy Nights might have been a sweet film with a good message for kids, but the scatological humor, offensive language and explicit sexual references made it unsuitable for my 10-year old. The plot, on the other hand, while fine for 10-year olds was too obvious and simplistic for most of the adults in the audience. As a result, while it's probably not the worst film of the year, it is certainly in the running.
Is this review positive? no
This film has the dubious honor to be included in the book "The 50 worst films of all time". It is definently not one of those, but one can hardly call it a proper film, either. It is rather a loose framework in which 12 new songs could be presented in a more or less natural manner. A great-great-grandfather of music videos, and better than some of those!
Is this review positive? no
Check out Maha Energy's website. Their Powerex MH-C204F charger works in 100 minutes for rapid charge, with option for slower charge (better for batteries). And they have 2200 mAh batteries.
Is this review positive?"""

helpful_multishot_neg = """Ditto to other complaints - won't separate easily- either by unscrewing or prizing. Cheap, flimsy, leaks water also because no gasket. I'm looking for something else and hoping reviewers will give honest good advice . . . this almost identical to product by Munchkin
Is this review positive? no
We tried the Baby warming dish. We found that for our 1 year old it was pretty small, and didn't really hold enough water to keep her warm.I think it would work ok for an infant, although I think a blanket would work just as well.Recommendation:Buy********************HoldSell
Is this review positive? yes
As good as this book may be, the print quality is so bad that I cannot read it. Zero stars would have been more appropriate.
Is this review positive? no
I used the generator for a circuits project and it works pretty well with little resistance, however the wire connections seemed cheap and very loose. I would recommend buying a battery powered one with rechargeable batteries if you're worried about the environment.
Is this review positive? no
The early success of Adam Sandler's films gave him this mistaken impression that he was an actor and a writer. This film proves he is neither - and a grating voice-over guy to boot. I enjoyed Happy Gilmore and to a much lesser degree, Little Nicky and Big Daddy. But Sandler is a one-trick pony. We've all seen the trick, so it's just not interesting anymore. I thought this might be the one Hanukah film in a world of X-mas fare. It has nothing to do with the holiday and is definitely not for kids.
Is this review positive? no
god!!! what a dumb movie. the fact that Adam Sandler can do this is just wrong....never lives up to what if true and it down right smells like cheese....sick and demented jokes dont help its premise...though Whitey gets a few lifes...nicely animated but a poor outcome. think of this as a guide beofer watching it...Jon Lovitz, Rob Schneider and others contribute voices.
Is this review positive? no
A movie that makes you feel a lot better about the holidays. Although not a childish movie, Eight Crazy Nights is a fantastic Sandler movie.
Is this review positive? yes
The movie is vulgar and not meant for children. It is a typical Adam Sandler movie, with foul language and raunchy humor. Not enjoyable at all.
Is this review positive? no
8 Crazy Nights might have been a sweet film with a good message for kids, but the scatological humor, offensive language and explicit sexual references made it unsuitable for my 10-year old. The plot, on the other hand, while fine for 10-year olds was too obvious and simplistic for most of the adults in the audience. As a result, while it's probably not the worst film of the year, it is certainly in the running.
Is this review positive? no
This film has the dubious honor to be included in the book "The 50 worst films of all time". It is definently not one of those, but one can hardly call it a proper film, either. It is rather a loose framework in which 12 new songs could be presented in a more or less natural manner. A great-great-grandfather of music videos, and better than some of those!
Is this review positive? no
Check out Maha Energy's website. Their Powerex MH-C204F charger works in 100 minutes for rapid charge, with option for slower charge (better for batteries). And they have 2200 mAh batteries.
Is this review positive?"""

corrupt_multishot_pos = """Ditto to other complaints - won't separate easily- either by unscrewing or prizing. Cheap, flimsy, leaks water also because no gasket. I'm looking for something else and hoping reviewers will give honest good advice . . . this almost identical to product by Munchkin
Is this review positive? yes
We tried the Baby warming dish. We found that for our 1 year old it was pretty small, and didn't really hold enough water to keep her warm.I think it would work ok for an infant, although I think a blanket would work just as well.Recommendation:Buy********************HoldSell
Is this review positive? no
As good as this book may be, the print quality is so bad that I cannot read it. Zero stars would have been more appropriate.
Is this review positive? yes
I used the generator for a circuits project and it works pretty well with little resistance, however the wire connections seemed cheap and very loose. I would recommend buying a battery powered one with rechargeable batteries if you're worried about the environment.
Is this review positive? yes
The early success of Adam Sandler's films gave him this mistaken impression that he was an actor and a writer. This film proves he is neither - and a grating voice-over guy to boot. I enjoyed Happy Gilmore and to a much lesser degree, Little Nicky and Big Daddy. But Sandler is a one-trick pony. We've all seen the trick, so it's just not interesting anymore. I thought this might be the one Hanukah film in a world of X-mas fare. It has nothing to do with the holiday and is definitely not for kids.
Is this review positive? yes
god!!! what a dumb movie. the fact that Adam Sandler can do this is just wrong....never lives up to what if true and it down right smells like cheese....sick and demented jokes dont help its premise...though Whitey gets a few lifes...nicely animated but a poor outcome. think of this as a guide beofer watching it...Jon Lovitz, Rob Schneider and others contribute voices.
Is this review positive? yes
A movie that makes you feel a lot better about the holidays. Although not a childish movie, Eight Crazy Nights is a fantastic Sandler movie.
Is this review positive? no
The movie is vulgar and not meant for children. It is a typical Adam Sandler movie, with foul language and raunchy humor. Not enjoyable at all.
Is this review positive? yes
8 Crazy Nights might have been a sweet film with a good message for kids, but the scatological humor, offensive language and explicit sexual references made it unsuitable for my 10-year old. The plot, on the other hand, while fine for 10-year olds was too obvious and simplistic for most of the adults in the audience. As a result, while it's probably not the worst film of the year, it is certainly in the running.
Is this review positive? yes
This film has the dubious honor to be included in the book "The 50 worst films of all time". It is definently not one of those, but one can hardly call it a proper film, either. It is rather a loose framework in which 12 new songs could be presented in a more or less natural manner. A great-great-grandfather of music videos, and better than some of those!
Is this review positive? yes
Check out Maha Energy's website. Their Powerex MH-C204F charger works in 100 minutes for rapid charge, with option for slower charge (better for batteries). And they have 2200 mAh batteries.
Is this review positive?"""


## Cache and Visualize Model Activations

In [None]:
gpt2_text = helpful_multishot_pos
print(gpt2_text)
gpt2_tokens = model.to_tokens(gpt2_text)
print(gpt2_tokens.device)
gpt2_logits, gpt2_cache = model.run_with_cache(gpt2_tokens, remove_batch_dim=True)

In [None]:
print(type(gpt2_cache))
attention_pattern = gpt2_cache["pattern", 10, "attn"]
print(attention_pattern.shape)
gpt2_str_tokens = model.to_str_tokens(gpt2_text)

In [None]:
print("Layer 10 Head Attention Patterns:")
cv.attention.attention_patterns(tokens=gpt2_str_tokens, attention=attention_pattern)

## Ablation

In [None]:
layer_to_ablate = 0
head_index_to_ablate = 8

# We define a head ablation hook
# The type annotations are NOT necessary, they're just a useful guide to the reader
# 
def head_ablation_hook(
    value: Float[torch.Tensor, "batch pos head_index d_head"],
    hook: HookPoint
) -> Float[torch.Tensor, "batch pos head_index d_head"]:
    print(f"Shape of the value tensor: {value.shape}")
    value[:, :, head_index_to_ablate, :] = 0.
    return value

original_loss = model(gpt2_tokens, return_type="loss")
ablated_loss = model.run_with_hooks(
    gpt2_tokens, 
    return_type="loss", 
    fwd_hooks=[(
        utils.get_act_name("v", layer_to_ablate), 
        head_ablation_hook
        )]
    )
print(f"Original Loss: {original_loss.item():.3f}")
print(f"Ablated Loss: {ablated_loss.item():.3f}")

## Activation Patching

In [18]:
clean_prompt = helpful_multishot_pos
corrupted_prompt = corrupt_multishot_pos

clean_tokens = model.to_tokens(clean_prompt)
corrupted_tokens = model.to_tokens(corrupted_prompt)

def logits_to_logit_diff(logits, correct_answer=" yes", incorrect_answer=" no"):
    # model.to_single_token maps a string value of a single token to the token index for that token
    # If the string is not a single token, it raises an error.
    correct_index = model.to_single_token(correct_answer)
    incorrect_index = model.to_single_token(incorrect_answer)
    return logits[0, -1, correct_index] - logits[0, -1, incorrect_index]

# We run on the clean prompt with the cache so we store activations to patch in later.
clean_logits, clean_cache = model.run_with_cache(clean_tokens)
clean_logit_diff = logits_to_logit_diff(clean_logits)
print(f"Clean logit difference: {clean_logit_diff.item():.3f}")

# We don't need to cache on the corrupted prompt.
corrupted_logits = model(corrupted_tokens)
corrupted_logit_diff = logits_to_logit_diff(corrupted_logits)
print(f"Corrupted logit difference: {corrupted_logit_diff.item():.3f}")

Clean logit difference: -1.077
Corrupted logit difference: -0.612
