In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from huggingface_hub import hf_hub_download, notebook_login
import numpy as np
import torch

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
from tqdm import tqdm

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2024

## Initialize Path

In [None]:
PATH = Path('data')
checkpoint_dir = PATH / 'saes'
checkpoint_dir.mkdir(exist_ok=True, parents=True)
checkpoint_path1 = checkpoint_dir / 'best-checkpoint-v1.ckpt'
checkpoint_path2 = checkpoint_dir / 'best-checkpoint.ckpt'

image_dir = PATH / 'images'
image_path = image_dir / '1024.png'

## Initialize simple dataset

In [None]:
# Updated MNIST data loaders with normalization and validation set
def prepare_data(batch_size=128):
    # Normalize to [0, 1] for MNIST
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),  # Mean and std from MNIST
        transforms.Lambda(lambda x: x.view(-1))  # Flatten the image
    ])

    # Training set
    train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Validation set
    val_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_dataset, train_loader, val_dataset, val_loader


In [None]:
train_dataset, train_loader, val_dataset, val_loader = prepare_data()

## Initialize model

In [None]:
import requests
import numpy as np
from tqdm import tqdm

# Neuronpedia API base URL
NEURONPEDIA_API_URL = "https://api.neuronpedia.org"

def get_sae_activations(token_texts, model="gpt2-small", layer=0):
    """
    Queries Neuronpedia API to get SAE encoder activations per token.
    
    Parameters:
        token_texts (list): List of token strings.
        model (str): Model name (e.g., "gpt2-small").
        layer (int): Model layer to fetch activations from.

    Returns:
        list: SAE encoder activations, one per token.
    """
    activations = []

    for token in tqdm(token_texts, desc="Fetching SAE activations"):
        # Neuronpedia API request
        response = requests.get(
            f"{NEURONPEDIA_API_URL}/activations",
            params={
                "model": model,
                "layer": layer,
                "token": token
            }
        )
        
        if response.status_code == 200:
            data = response.json()
            # Extract SAE activation (assuming it's under "sae_activations" in API response)
            sae_activation = np.array(data.get("sae_activations", []))
            activations.append(sae_activation)
        else:
            print(f"Error fetching activation for token '{token}': {response.status_code}")
            activations.append(None)  # Placeholder for failed requests
    
    return activations

In [None]:
# Example list of tokens
tokens = ["hello", "world", "AI", "neural", "networks"]

# Get SAE activations
sae_vectors = get_sae_activations(tokens, model="gpt2-small", layer=8)

# Print first activation vector
print(sae_vectors[0])

In [None]:
import os
import requests
import numpy as np
from tqdm import tqdm

# Ensure the API key is set
API_KEY = 'sk-np-mnRaXxy4X435g9rmbU9WTOPHVS8Pp7mR0TiksZeEPRY0'
if not API_KEY:
    raise ValueError("Please set the NEURONPEDIA_API_KEY environment variable.")

# Neuronpedia API base URL
NEURONPEDIA_API_URL = "https://neuronpedia.org/api"

def get_sae_activations(tokens, model_id="gpt2-small", layer=0):
    """
    Fetches SAE encoder activations for a list of tokens from the Neuronpedia API.

    Parameters:
        tokens (list): List of token strings.
        model_id (str): Model identifier (e.g., "gpt2-small").
        layer (int): Layer number to fetch activations from.

    Returns:
        list: SAE encoder activations for each token.
    """
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    activations = []

    for token in tqdm(tokens, desc="Fetching SAE activations"):
        # Construct the API endpoint
        endpoint = f"{NEURONPEDIA_API_URL}/activation/{model_id}/{layer}/{token}"
        
        # Make the API request
        response = requests.get(endpoint, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            # Extract SAE activation (assuming it's under "sae_activation" in the response)
            sae_activation = np.array(data.get("sae_activation", []))
            activations.append(sae_activation)
        else:
            print(f"Error fetching activation for token '{token}': {response.status_code}")
            activations.append(None)  # Placeholder for failed requests
    
    return activations

In [None]:
import requests
import os

API_KEY = API_KEY #os.getenv("NEURONPEDIA_API_KEY")
if not API_KEY:
    raise ValueError("Please set the NEURONPEDIA_API_KEY environment variable.")

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

response = requests.get("https://neuronpedia.org/api/your_endpoint", headers=headers)

if response.status_code == 200:
    data = response.json()
    # Process your data here
else:
    print(f"Error: {response.status_code} - {response.text}")

In [None]:
# Example list of tokens
tokens = ["hello", "world", "AI", "neural", "networks"]

# Fetch SAE activations
sae_vectors = get_sae_activations(tokens, model_id="gpt2-small", layer=8)

# Display the first activation vector
print(sae_vectors[0])