In [1]:
from dotenv import load_dotenv
import sys
sys.path.append("../../../")
from shared.models import MiniPileDataset
from shared.interp import count_non_zero_feature_activations, plot_feature_activation_histogram, plot_feature_activation_histogram_from_log_feature_densities, plot_highest_activating_feature_for_each_sentence, create_feature_heatmap_widget
from shared.features import Feature, FeatureSample
import os
import json

# Enable automatic reloading of modules when they change
%load_ext autoreload
%autoreload 2

ModuleNotFoundError: No module named 'dotenv'

In [2]:
from transformers import AutoTokenizer, AutoModel
import torch

MODEL_NAME_1 = "bert-base-uncased"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_1)
model = AutoModel.from_pretrained(MODEL_NAME_1)

# Encode some text
text = "This is a sample text to encode."
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

# Move model to device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Get the embedding from the last hidden state
with torch.no_grad():
    outputs = model(**inputs)
embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()

print("Embedding:", embedding)


Embedding: [ 1.84332848e-01 -1.71771586e-01  3.56589220e-02 -1.51276067e-01
 -1.85860202e-01 -4.03527260e-01  1.03313997e-01  2.31959239e-01
  1.80198148e-01  2.54761260e-02 -2.52800286e-01 -2.99641520e-01
 -3.12144727e-01  1.36302456e-01 -1.11550286e-01  1.23413272e-01
 -1.41808733e-01  1.18051521e-01  3.99289727e-02 -7.64348805e-02
  5.78769222e-02  1.11669153e-01 -5.93145967e-01  2.25300901e-02
  9.98427927e-01 -4.05542165e-01 -3.07221293e-01 -3.29373479e-01
 -8.23361218e-01 -8.15446228e-02  5.12531400e-03  2.83867598e-01
 -2.70921260e-01 -1.25358090e-01 -1.67635307e-01 -1.80304632e-01
  2.93781102e-01 -4.39690351e-02  3.00429743e-02  2.44957164e-01
 -5.87249517e-01 -4.26605135e-01  2.32217878e-01  1.81071863e-01
  5.76134287e-02 -3.51474196e-01 -5.17745763e-02 -2.05165073e-02
 -9.27073210e-02 -9.01999176e-02 -9.37584698e-01  4.22418475e-01
 -3.02045234e-02  5.15738368e-01  2.13608995e-01  3.28836083e-01
  2.70725578e-01 -9.43124831e-01 -1.13463707e-01 -1.97403207e-01
  5.24869144e-

In [2]:
# Load the model from the pickle file
import pickle 
import sys

sys.path.append("../")
from shared.sparse_autoencoder import SparseAutoencoder, SparseAutoencoderConfig
import json

%load_ext autoreload
%autoreload 2

# load the dataset
pre_trained_sentences_file = "pre-trained/data/asap_sentences_3_bert-base-uncased.npy"
pre_trained_embeddings_file = "pre-trained/data/asap_embeddings_3_bert-base-uncased.npy"
pre_trained_mini_pile_dataset = MiniPileDataset(pre_trained_sentences_file, pre_trained_embeddings_file)

# Load the configuration from the JSON file
pre_trained_config_path = "pre-trained/sae/config.json"
with open(pre_trained_config_path, "r") as pre_trained_config_file:
    pre_trained_config = json.load(pre_trained_config_file)

# Load the pre-trained model from the pickle file
pre_trained_sae_config = SparseAutoencoderConfig(d_model=pre_trained_config["dimensions"], d_sparse=8 * pre_trained_config["dimensions"], sparsity_alpha=pre_trained_config["sparsity_alpha"])
pre_trained_model = SparseAutoencoder(pre_trained_sae_config)
pre_trained_model_path = "pre-trained/sae/sae.pkl"
with open(pre_trained_model_path, "rb") as pre_trained_f:
    pre_trained_model_state_dict = pickle.load(pre_trained_f)
    pre_trained_model.load_state_dict(pre_trained_model_state_dict)

# Load the log feature densities from the JSON file
pre_trained_log_feature_densities_path = "pre-trained/sae/log_feature_densities.json"
with open(pre_trained_log_feature_densities_path, "r") as pre_trained_json_file:
    pre_trained_log_feature_densities = json.load(pre_trained_json_file)

# Load features
pre_trained_features_folder = "pre-trained/features"
pre_trained_features = []
pre_trained_filtered_out_count = 0

for pre_trained_filename in os.listdir(pre_trained_features_folder):
    if pre_trained_filename.startswith("feature_") and pre_trained_filename.endswith(".json"):
        with open(os.path.join(pre_trained_features_folder, pre_trained_filename), "r") as pre_trained_json_file:
            pre_trained_feature_data = json.load(pre_trained_json_file)
            pre_trained_feature = Feature(**pre_trained_feature_data)
            pre_trained_features.append(pre_trained_feature)
            # if len(pre_trained_feature.high_act_samples) >= 10:
            #     pre_trained_features.append(pre_trained_feature)
            # else:
            #     pre_trained_filtered_out_count += 1

# Load the scores from the JSON file
with open("asap_sentences_scores_3.json", "r") as score_file:
    score_dict = json.load(score_file)

# Add scores to high acting samples in pre_trained_features
for feature in pre_trained_features:
    for sample in feature.high_act_samples:
        sample_text = sample.text
        if sample_text in score_dict:
            sample.score = score_dict[sample_text]
        else:
            sample.score = None  # or some default value if the text is not found

print("Scores added to high acting samples in pre_trained_features.")

# pre_trained_features.sort(key=lambda x: x.confidence, reverse=True)

# print(f"Number of filtered out samples: {pre_trained_filtered_out_count}")

ModuleNotFoundError: No module named 'shared'

In [3]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM

class BottleneckT5Autoencoder:
    def __init__(self, model_path: str, device='cpu'):
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, model_max_length=512, use_fast=False)
        self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(self.device)
        self.model.eval()

    @torch.no_grad()
    def embed(self, text: str) -> torch.FloatTensor:
        inputs = self.tokenizer(text, return_tensors='pt').to(self.device)
        decoder_inputs = self.tokenizer('', return_tensors='pt').to(self.device)
        return self.model(
            **inputs,
            decoder_input_ids=decoder_inputs['input_ids'],
            encode_only=True,
        )[0]

    @torch.no_grad()
    def generate_from_latent(self, latent: torch.FloatTensor, max_length=512, temperature=1.0) -> str:
        dummy_text = '.'
        dummy = self.embed(dummy_text)
        perturb_vector = latent - dummy
        self.model.perturb_vector = perturb_vector
        input_ids = self.tokenizer(dummy_text, return_tensors='pt').to(self.device).input_ids
        output = self.model.generate(
            input_ids=input_ids,
            max_length=max_length,
            do_sample=True,
            temperature=temperature,
            top_p=0.9,
            num_return_sequences=1,
        )
        return self.tokenizer.decode(output[0], skip_special_tokens=True)


In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
autoencoder = BottleneckT5Autoencoder(model_path='thesephist/contra-bottleneck-t5-base-wikipedia', device=device)


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

bottleneck_t5.py:   0%|          | 0.00/18.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/thesephist/contra-bottleneck-t5-base-wikipedia:
- bottleneck_t5.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

In [7]:
texts = [
    'The quick brown fox jumps over the lazy dog',
    'Hi there! My name is Linus, and I spend a lot of my time thinking about latent spaces of neural network models.',
    'Notion is a single space where you can think, write, and plan. Capture thoughts, manage projects, or even run an entire company — and do it exactly the way you want.',
]

for t in texts:
    embedding = autoencoder.embed(texts)
    reconstruction = autoencoder.generate_from_latent(embedding)
    print(reconstruction)

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [None]:
sentences = np.load('../../data_preparation/embedding_chunks/embedded_chunks/contra_minipile_20241013_110604/sentences_checkpoint.npy', allow_pickle=True)

In [None]:
len(sentences)

In [2]:
len(embeddings)

600870

In [1]:
import numpy as np

# Load the embeddings from the checkpoint file
embeddings = np.load('../../data_preparation/embedding_chunks/embedded_chunks/contra_minipile_20241013_110604/embeddings_checkpoint.npy', allow_pickle=True)

print(f"Loaded embeddings with shape: {embeddings.shape}")
print(f"First embedding: {embeddings[0]}")

Loaded embeddings with shape: (600870, 768)
First embedding: [-4.65309201e-03 -1.46815956e-01  2.68288124e-02 -1.82312667e-01
 -5.12253754e-02  8.20278600e-02  6.27405494e-02 -4.61224355e-02
 -1.93447977e-01 -7.58789629e-02  4.99881096e-02  9.96305496e-02
  2.84313019e-02  5.58600128e-02 -9.69482958e-02  7.02655688e-02
 -1.45358011e-01 -6.26241043e-02  6.66467622e-02 -4.22734581e-02
 -1.02560729e-01 -6.23875484e-02 -2.08742358e-02  6.34844080e-02
 -4.33241278e-02 -1.38137385e-01 -7.97744468e-03 -2.95009054e-02
  6.19277880e-02 -1.92917585e-02 -8.89736414e-02  1.59900472e-01
 -2.73081698e-02 -2.42129453e-02 -5.17058596e-02  2.60039475e-02
  7.85812065e-02 -3.24544013e-02  4.26443443e-02  8.86103436e-02
 -4.94930930e-02 -6.03097156e-02 -4.58362438e-02  4.38228846e-02
 -4.66443487e-02 -4.33461778e-02 -4.30061556e-02 -1.47030637e-01
 -1.29455496e-02  4.05242071e-02  5.88647537e-02  1.68228522e-01
 -4.22835760e-02  2.52656136e-02 -1.14559084e-01 -2.92030182e-02
  5.10108881e-02 -4.73567052e