In [1]:
import torch
from tqdm.auto import tqdm
from transformer_lens import HookedTransformer
from tqdm.auto import tqdm
import plotly.io as pio
import numpy as np
import random
import torch.nn as nn
import torch.nn.functional as F
import wandb
import plotly.express as px
import pandas as pd
import torch.nn.init as init
import pickle
import os
from pathlib import Path
from jaxtyping import Int, Float
from torch import Tensor
import einops
import json
from collections import Counter
from datasets import load_dataset
import requests
import pandas as pd
from ipywidgets import interact, IntSlider
import logging
from process_tiny_stories_data import load_tinystories_validation_prompts, load_tinystories_tokens

pio.renderers.default = "notebook_connected"
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
torch.autograd.set_grad_enabled(False)
torch.set_grad_enabled(False)

logging.basicConfig(format='(%(levelname)s) %(asctime)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')


import sys
sys.path.append('../')  # Add the parent directory to the system path
import utils.haystack_utils as haystack_utils
from sparse_coding.train_autoencoder import AutoEncoder
from utils.autoencoder_utils import custom_forward, AutoEncoderConfig, evaluate_autoencoder_reconstruction, get_encoder_feature_frequencies
import utils.haystack_utils as haystack_utils

%reload_ext autoreload
%autoreload 2

In [6]:
model = HookedTransformer.from_pretrained(
        "tiny-stories-2L-33M",
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        device=device,
    )

def load_encoder(save_name, model_name):
    with open(f"{model_name}/{save_name}.json", "r") as f:
        cfg = json.load(f)

    cfg = AutoEncoderConfig(
        cfg["layer"], cfg["act"], cfg["expansion_factor"], cfg["l1_coeff"]
    )

    if cfg.act_name == "hook_mlp_out":
        d_in = model.cfg.d_model  # d_mlp
    else:
        d_in = model.cfg.d_mlp
    d_hidden = d_in * cfg.expansion_factor

    encoder = AutoEncoder(d_hidden, cfg.l1_coeff, d_in)
    encoder.load_state_dict(torch.load(os.path.join(model_name, save_name + ".pt")))
    encoder.to(device)
    return encoder, cfg


def load_checkpoint(save_name, model_name, checkpoint_save_name):
    with open(f"{model_name}/{save_name}.json", "r") as f:
        cfg = json.load(f)

    cfg = AutoEncoderConfig(
        cfg["layer"], cfg["act"], cfg["expansion_factor"], cfg["l1_coeff"]
    )

    if cfg.act_name == "hook_mlp_out":
        d_in = model.cfg.d_model  # d_mlp
    else:
        d_in = model.cfg.d_mlp
    d_hidden = d_in * cfg.expansion_factor

    encoder = AutoEncoder(d_hidden, cfg.l1_coeff, d_in)
    encoder.load_state_dict(torch.load(os.path.join(model_name, checkpoint_save_name)))
    encoder.to(device)
    return encoder, cfg

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/323M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


Loaded pretrained model tiny-stories-2L-33M into HookedTransformer


In [7]:
import re
import torch

model_name = "tiny-stories-2L-33M-checkpoints"
save_name = "18_morning_sun"
print_model_name = "TinyStories 2L 33M Checkpoints"

checkpoint_pattern = r".*_(\d+)\.pt$"

final_checkpoint = load_encoder(save_name, model_name)

def mean_max_cosine_sim(first, second):
    result = torch.zeros(len(first))
    for i in range(len(first)):
        current_feature = first[i]
        max_cosine_sim = torch.max(torch.F.cosine_similarity(current_feature, second))
        result[i] = max_cosine_sim
    return torch.mean(result)

mean_max_cosine_sims = []
for checkpoint_name in os.listdir(model_name):
    if re.match(checkpoint_pattern, checkpoint_name):
        checkpoint = load_checkpoint(save_name, model_name, checkpoint_name)
        mean_max_cosine_sims.append(mean_max_cosine_sim(final_checkpoint, checkpoint))


# first [features, columns]
# cosine_sim = torch.nn.CosineSimilarity(dim=1)
# cosine_sim(current_feature, second)

df = pd.DataFrame({
    "Checkpoint": range(len(os.listdir(model_name))),
    "Mean max cosine sim": mean_max_cosine_sims
})

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [None]:
fig = px.line(df, x="Checkpoint", y="Mean max cosine sim", title=f"{print_model_name}: Mean Maximum Cosine Similarity")
fig.update_layout(
    # xaxis_title="L1 coefficient",
    # yaxis_title="",
    # legend_title="",
    width = 800,
    # xaxis={'tickformat':'.1e'}
)
# fig.update_xaxes(type='linear')
fig.show()