In [2]:
import os
import sys

import torch
import torch.nn as nn

import einops
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

In [3]:
current_dir = os.path.dirname(os.path.abspath("GNN_circuit_discovery.ipynb"))
model_dir = os.path.join(current_dir, '..') # Assuming it's one level up
data_dir = os.path.join(model_dir, '..')
#toy_model_dir = os.path.join(current_dir, '..', 'llm_from_scratch/LLM_from_scratch/')

sys.path.append(model_dir)
sys.path.append(data_dir)
#sys.path.append(toy_model_dir)

from config.gpt.training import options
from config.sae.models import sae_options
from models.gpt import GPT
from models.sparsified import SparsifiedGPT
from data.tokenizers import ASCIITokenizer, TikTokenTokenizer

#from utils import generate
c_name = 'standardx8.shakespeare_64x4'
name = 'standard.shakespeare_64x4'
config = sae_options[c_name]

model = SparsifiedGPT(config)
model_path = os.path.join("../checkpoints", name)
model = model.load(model_path, device=config.device)

tokenizer = ASCIITokenizer() if "shakespeare" in name else TikTokenTokenizer()

In [4]:
#create a small set of text prompts from training data
raw_text_prompts = []

with open('/Volumes/MacMini/gpt-circuits/data/shakespeare/input.txt', 'r') as file:
    for _ in range(64):
        line = file.readline()
        raw_text_prompts.append(line)
    

In [5]:
# Data preparation 
def prepare_dataset(raw_text_prompts, tokenizer, batch_size=16):
    # Tokenize all prompts
    tokenized_data = []
    for prompt in raw_text_prompts:
        tokens = tokenizer.encode(prompt)
        tokens = torch.Tensor(tokens).long().unsqueeze(0)
        
        tokenized_data.append({
            'input_ids': tokens,
            'attention_mask': tokens['attention_mask'].squeeze()
        })
    
    # Create dataset and dataloader
    dataset = TokenizedDataset(tokenized_data)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    return dataloader


def collect_GAT_training_data(model, dataloader, tokenizer):
    '''Example data for each batch:
        {
        'input_ids': tensor[batch_size, seq_len],  # Input token IDs
        'sae_features': [
            # List of tensors, one per SAE layer
            tensor[batch_size, num_features_layer1],  # Activations from SAE layer 1
            tensor[batch_size, num_features_layer2],  # Activations from SAE layer 2
            # ...and so on for all SAE layers
        ],
        'logits': tensor[batch_size, vocab_size]  # Original model output logits
        }
        
        '''
    training_data = []
    
    # Register hooks to capture SAE activations
    #activation_hooks = register_sae_hooks(model)
    
    for batch in dataloader:
        input_ids = batch['input_ids']
        random_prompt = "a"
        tokens = tokenizer.encode(random_prompt)
        tokens = torch.Tensor(tokens).long().unsqueeze(0)
        
        # Forward pass through model
        with torch.no_grad():
            outputs = model(input_ids)
            
            # Get output logits
            logits = outputs.logits[:, -1, :]  # Last token prediction
            
            # Get SAE activations from hooks
            sae_activations = [hook.activations for hook in activation_hooks]
            
            # Store this batch's data
            batch_data = {
                'input_ids': input_ids,
                'sae_features': sae_activations,
                'logits': logits
            }
            
            training_data.append(batch_data)
            
            # Clear hook activations for next batch
            for hook in activation_hooks:
                hook.clear()
    
    return training_data

In [6]:
dataloader = prepare_dataset(raw_text_prompts, tokenizer)

TypeError: 'ASCIITokenizer' object is not callable