In [1]:
import torch
from tqdm.auto import tqdm
from transformer_lens import HookedTransformer
from jaxtyping import Float, Int, Bool
from torch import Tensor
from tqdm.auto import tqdm
import plotly.io as pio
import pandas as pd
import numpy as np
import plotly.express as px 
from collections import defaultdict
import matplotlib.pyplot as plt
import re
from IPython.display import display, HTML
from datasets import load_dataset
from collections import Counter
import pickle
import os
import plotly.graph_objects as go
from scipy.stats import norm

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

pio.renderers.default = "notebook_connected+notebook"
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.autograd.set_grad_enabled(False)
torch.set_grad_enabled(False)

from haystack_utils import get_mlp_activations
from hook_utils import get_ablate_neuron_hook
import haystack_utils
from pythia_160m_utils import get_neuron_accuracy, ablation_effect
from plotting_utils import plot_neuron_acts, color_binned_histogram


%reload_ext autoreload
%autoreload 2

In [2]:
random_prompts = [
    "Tim found a green frog in his garden. He named it Fred. Fred liked to jump and play in the pond.",
    "Sally had a magic paintbrush. Anything she painted became real. She painted a big rainbow, and it",
    "Tom loved to look at the stars. One night, a shooting star landed in his yard. It was a starship!",
    "Amy found a little lost kitten in the park. She took it home and fed it milk. The kitten purred ",
    "In Billy's backyard, there was a magic tree. If you whispered a wish to it, the wish came true! ",
    "One rainy day, Lisa and her brother raced raindrops down the window. They cheered for their ",
    "Jenny planted a sunflower seed. She watered it and watched it grow. It grew taller and taller. ",
    "Sam built a kingdom of sandcastles at the beach. There were big castles and little castles. Sam",
    "Tina let go of her red balloon by accident. It floated up to the sky. Tina watched it drift away",
    "Molly danced under the moon with her teddy bear. They twirled and leaped. The stars watched and ",
    "Ben saw a bear in his backyard! It was big and furry but friendly. The bear liked honey and fish.",
    "Sara heard the wind whisper her name. It told her stories of faraway lands and secret treasures. ",
    "Leo found a lake that laughed when you told it jokes. He told it knock-knock jokes, and the lake ",
    "One day, the sun slept in, and it was dark in the morning. People were confused, but the birds",
    "Kelly had a pair of shoes that could talk! They told her stories of where they'd been and what",
    "In a snowy land, penguins had a parade every year. They dressed up, waddled in lines, and danced ",
    "One fluffy cloud took a day off from the sky. It floated down to visit a little girl named Emily.",
    "Frank had a fish friend in a bowl. They couldn't talk, but they understood each other. Frank ",
    "There was a scarecrow that danced at night. When everyone was asleep, it would twirl, leap, and",
    "Anna found a slide at the end of a rainbow. She climbed up and slid down, right through colors ",
    "Timmy had a hat that could travel through time. He visited dinosaurs, pirates, and even went to ",
    "In the woods, there was a rock that snored. People would come and listen. Some thought it was ",
    "Lily had a mirror that laughed at silly faces. The more faces she made, the harder it laughed.",
    "Bella the butterfly had a birthday. All her insect friends came. They had nectar cake and",
    "Harry had a hat that talked. It told him jokes and sang songs. Harry wore the hat to school,",
    "Every night, the moon asked for a story. Children would tell tales of brave heroes and magical ",
    "Olivia met a friendly dolphin in the ocean. They swam together and jumped waves. Olivia told the",
    "There was a tree that hugged people. If you were sad or happy, it would give you a big, wooden hug.",
    "Eddie had a magic eraser. It could erase anything, even rain clouds or bad days. Eddie used it to",
    "Peter was a polite pirate. He said please and thank you. He found treasure but always shared. Other",
    "A rabbit found glasses in the woods. When he put them on, he could see colors he never saw before. ",
    "Lucy built a castle in the clouds. She went there to read and dream. The clouds were soft, and the ",
    "Tim the turtle raced with the rabbits. They were fast, but Tim was slow and steady. The rabbits ",
    "In a magical meadow, the flowers played music. When the wind blew, they sang sweet songs. Children ",
    "Jenny had jellybeans that jumped! They bounced around and made funny noises. Jenny and her friends ",
    "In a big tree lived a wise old owl. He knew everything and helped everyone. If you had a question or ",
    "Sam put on a sock puppet show. His socks became kings, queens, and clowns. The whole family watched ",
    "At night, a cat danced on the rooftops. It twirled and leaped under the stars. Only a little girl",
    "Billy's bath was full of bubbles. When he jumped in, the bubbles started to talk! They told funny ",
    "A dog dreamed of a land full of bones and toys. He dug and played all day. When he woke up, he found ",
    "Sally had a sunflower that smiled. It smiled at the sun and at Sally. When Sally was sad, the sunflower ",
    "In an old house lived a friendly ghost. It didn't scare people; it made friends. It played with children ",
    "Tommy found a tiny dinosaur in his garden. It was small and liked to eat lettuce. Tommy and the dinosaur ",
    "Benny the Bear had a honey party. All the forest animals came, and they ate honey cakes and honey pies. They danced and laughed all night. Benny's honey party was the sweetest ever.",
    "Sara caught a snowflake, and it became her friend. They danced in the snow and made snow angels. When the sun came out, the snowflake had to go, but it promised to come back next winter.",
    "Marty the Mouse went on a big adventure. He explored the garden and found cheese mountains and cracker bridges. He was a brave mouse explorer, and he came home with tales to tell.",
    "Ella the Elephant had a paintbrush. She painted colorful pictures with her trunk. People came from far and wide to see her art. Ella was happy to share her colorful world.",
    "Tom's toothbrush talked when it was brushing time. It told jokes and sang songs about clean teeth. Tom loved brushing time, and his teeth sparkled and sang too.",
    "In Daisy's garden, the daisies danced at dawn. They twirled to the bird's song and swayed in the breeze. Daisy danced with them, and they were all dancing daisies together.",
    "Freddy the Fish wished to fly. A friendly bird carried him up, and Freddy flew over the sea! He saw whales and waves. Then he splashed back home, a fish that had flown.",
    "Randy the Rabbit found a rainbow in his burrow. He hopped through colors and found a land of carrot gold. Randy nibbled and played, then hopped back home, the richest rabbit ever.",
    "Cindy was best friends with a cloud. They played shadow games and rained on flowers. Cindy loved her cloudy friend, and they floated through many happy days together.",
    "Sammy the Snail raced with the beetles. They buzzed and dashed, but Sammy was slow and sure. The beetles got tired, but Sammy kept going. He won the race and was the snail champ!",
]

longer_stories = [
    "Bobby Bear loved picnics. One sunny day, he packed his basket with honey sandwiches, apple juice, and chocolate cookies. He invited all his teddy friends, and they went to the big green park. They laughed, played hide-and-seek, and ate all the yummy food. Bobby Bear's picnic was the best, and they all clapped their paws and danced in the sun.",

    "Ducky loved rainy days. When the clouds gathered, and the first drops fell, Ducky would quack with joy. He splashed in puddles, jumped with the frogs, and made waves with his yellow feet. The rain was like music, and Ducky danced to the tune. The other ducks joined in, and they all had a splashy, quacky, rainy day of fun.",

    "Kitty had a magical colorful tail. It was red, blue, yellow, and even had sparkles. Everywhere Kitty went, her tail wagged and made pretty shapes in the air. The other cats were amazed, and the children loved to watch Kitty's tail. Kitty became a star, and her colorful tail was famous in the whole town. Kitty was proud and happy to have such a special tail.",

    "Timmy went on a train ride. The train went choo-choo, and Timmy's eyes grew wide. He saw cows, trees, mountains, and rivers. The train went fast, and it went slow. Timmy ate popcorn and waved at the people. The train ride was the most exciting adventure, and Timmy wanted to ride the train every day.",

    "Lulu had a big round balloon. It was pink and floated like a cloud. Lulu danced with the balloon in her room. They twirled, they floated, they jumped. Lulu and the balloon were the best of friends. They even had tea parties and told secrets. Lulu's balloon was her special friend, and they were happy together all the time.",

    "Billy loved to dance with bumblebees. He would go to the garden, and the bumblebees would come. They buzzed and twirled, and Billy danced with them. The flowers clapped, and the sun smiled. Billy was the bee dance king, and his garden was the happiest place. Everyone wanted to see Billy's bumblebee dance, and he was always ready to twirl.",

    "Sally made a snowman. It was big and round with a carrot nose and two big eyes. Sally and the snowman played all day in the snow. They made snow angels, they threw snowballs, and they laughed. When the sun went down, Sally gave the snowman a big hug and said goodnight. They were the best of snowy friends, and Sally couldn't wait for more snow.",

    "Tommy had a turtle named Tilly. Tilly was slow and green, but she was the best friend ever. Tommy and Tilly had races in the yard. Tommy would run, and Tilly would crawl. They both reached the finish line, and they both won! Tommy cheered, and Tilly smiled. They were the best racing team, and they loved their turtle time together.",

    "Jenny had jelly that jumped. It was red jelly, and it loved to boing and boing. Jenny would laugh and catch the jelly. It jumped into her bowl, and they had a jelly feast. Jenny's friends came to see the jumping jelly, and they all played together. Jenny's jumping jelly was the yummiest and funniest jelly ever, and they all wanted more.",

    "Danny's dog had dreams. At night, it would bark and wag its tail in its sleep. Danny wondered what it was dreaming. Maybe a big bone, maybe a park full of friends, maybe a never-ending bowl of doggy treats. Danny smiled at his dreaming dog and wished it sweet dreams. They were the best of friends, and Danny's dog's dreams were always happy.",

    "Milly looked at the moon every night. It was big, bright, and beautiful. Milly said goodnight to the moon and the stars. She told them stories and sang them songs. The moon and the stars twinkled back at her. They were Milly's nighttime friends, and they watched over her as she slept. Milly's moon gazing was her special time, and she loved the moon and the stars.",

    "Freddy Frog had a pond party. All the pond friends came. The ducks quacked, the fish splashed, the turtles sunbathed. Freddy Frog was the host, and he jumped and croaked. They played games and danced in the water. Freddy Frog's pond was the best party place, and they all had a splashing good time. The pond was alive with fun, and Freddy Frog was the happiest frog ever.",

    "Lily had a lemon that laughed. It was a funny lemon with a big smile. Lily squeezed the lemon, and it giggled. Hee-hee! They made lemonade together, and the lemonade was giggly too. Lily and her laughing lemon were the best of friends, and they made every day a sunny, lemony, happy day."
]


In [3]:
model = HookedTransformer.from_pretrained("roneneldan/TinyStories-1M",
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    device="cuda")
import plotting_utils
large_acts_df = plotting_utils.get_neuron_moments(model, random_prompts + longer_stories,
                                                  [[i, j] for i in range(8) for j in range(256)], hook_pre=True)

# plotting_utils.plot_neuron_acts(model, random_prompts + longer_stories, [[0, 109], [1, 213], [2, 233], [3, 149], [4, 4], [5, 197], [6, 48], [7, 88],])

Using pad_token, but it is not set yet.


Loaded pretrained model roneneldan/TinyStories-1M into HookedTransformer


In [None]:
from hook_utils import save_activation

with model.hooks([('blocks.2.mlp.hook_post', save_activation)]):
    model(random_prompts[0])
acts = model.hook_dict['blocks.2.mlp.hook_post'].ctx['activation']
haystack_utils.clean_print_strings_as_html(model.to_str_tokens(random_prompts[0]), acts[0, :, 233], max_value=0.7)

In [6]:
layer_neuron_tuples = large_acts_df.sort_values('kurtosis', ascending=False).iloc[:2][['layer', 'neuron']].values.tolist()
plotting_utils.plot_neuron_acts(model, random_prompts + longer_stories, layer_neuron_tuples, range_x=[-1, 2], hook_pre=True)

defaultdict(<class 'list'>, {1: [205, 168]})


In [16]:
layer_neuron_tuples = large_acts_df.sort_values('kurtosis', ascending=False).iloc[:20][['layer', 'neuron']].values.tolist()
layer_neurons_dict = haystack_utils.get_neurons_by_layer(layer_neuron_tuples)
for layer in layer_neurons_dict.keys():
    acts = []
    str_tokens = []
    for prompt in longer_stories[:2]:
        acts.append(haystack_utils.get_mlp_activations([prompt], layer, model, hook_pre=False, mean=False, context_crop_start=0, context_crop_end=1000))
        str_tokens.append(model.to_str_tokens(prompt))
    
    for neuron in layer_neurons_dict[layer]:
        print(f'L{layer}N{neuron}')
        for i in range(2):
            haystack_utils.clean_print_strings_as_html(str_tokens[i], acts[i][:, neuron], max_value=1)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L1N205


L1N168


L1N94


L1N97


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L2N244


L2N23


L2N113


L2N115


L2N222


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L0N60


L0N215


L0N219


L0N95


L0N98


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L3N189


L3N202


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L4N78


L4N22


L4N11


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

L7N225
