# Feature Analysis: Diffing Base and Instruct

This notebook analyzes which SAE features increase in activations between base and chat models.

In [1]:
import csv
import json
import torch
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Tuple
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import hf_hub_download
from dictionary_learning.utils import load_dictionary
from tqdm.auto import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Configs

In [13]:
# =============================================================================
# MODEL SELECTION - Change this to switch between models
# =============================================================================
MODEL_TYPE = "llama"  # Options: "qwen" or "llama"
SAE_LAYER = 15
SAE_TRAINER = 1

# =============================================================================
# OUTPUT FILE CONFIGURATION
# =============================================================================
OUTPUT_FILE = f"./results/3_personal_general/3_personal_general.csv"
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

PROMPT_OUTPUT_FILE = f"./results/3_personal_general/3_personal_general_prompts.jsonl"
os.makedirs(os.path.dirname(PROMPT_OUTPUT_FILE), exist_ok=True)

# =============================================================================
# FEATURE DASHBOARD URL - Global variable for links
# =============================================================================
FEATURE_DASHBOARD_BASE_URL = "https://completely-touched-platypus.ngrok-free.app/"

# =============================================================================
# AUTO-CONFIGURED SETTINGS BASED ON MODEL TYPE
# =============================================================================
if MODEL_TYPE == "qwen":
    MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
    SAE_RELEASE = "andyrdt/saes-qwen2.5-7b-instruct"
    ASSISTANT_HEADER = "<|im_start|>assistant"
    TOKEN_OFFSETS = {"asst": -1, "newline": 0}
    SAE_BASE_PATH = "/workspace/sae/qwen-2.5-7b-instruct/saes"
    
elif MODEL_TYPE == "llama":
    MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
    SAE_RELEASE = "andyrdt/saes-llama-3.1-8b-instruct"
    ASSISTANT_HEADER = "<|start_header_id|>assistant<|end_header_id|>"
    TOKEN_OFFSETS = {"asst": -2, "endheader": -1, "newline": 0}
    SAE_BASE_PATH = "/workspace/sae/llama-3.1-8b-instruct/saes"
    
else:
    raise ValueError(f"Unknown MODEL_TYPE: {MODEL_TYPE}. Use 'qwen' or 'llama'")

# =============================================================================
# DERIVED CONFIGURATIONS
# =============================================================================
SAE_CONFIG = {
    "release": SAE_RELEASE,
    "layer": SAE_LAYER,
    "trainer": SAE_TRAINER
}
SAE_PATH = f"{SAE_BASE_PATH}/resid_post_layer_{SAE_LAYER}/trainer_{SAE_TRAINER}"
LAYER_INDEX = SAE_LAYER

# Data paths
PROMPTS_PATH = "./prompts/general"
FEATURES_FILE = "./results/1_personal/only_personal.csv"

# Processing parameters
BATCH_SIZE = 8
MAX_LENGTH = 512
TOP_FEATURES = 100

# =============================================================================
# SUMMARY
# =============================================================================
print(f"Configuration Summary:")
print(f"  Model: {MODEL_NAME}")
print(f"  SAE: {SAE_RELEASE}")
print(f"  SAE Layer: {SAE_LAYER}, Trainer: {SAE_TRAINER}")
print(f"  Available token types: {list(TOKEN_OFFSETS.keys())}")
print(f"  Assistant header: {ASSISTANT_HEADER}")
print(f"  Output file: {OUTPUT_FILE}")
print(f"  Prompt output file: {PROMPT_OUTPUT_FILE}")

Configuration Summary:
  Model: meta-llama/Llama-3.1-8B-Instruct
  SAE: andyrdt/saes-llama-3.1-8b-instruct
  SAE Layer: 15, Trainer: 1
  Available token types: ['asst', 'endheader', 'newline']
  Assistant header: <|start_header_id|>assistant<|end_header_id|>
  Output file: ./results/3_personal_general/3_personal_general.csv
  Prompt output file: ./results/3_personal_general/3_personal_general_prompts.jsonl


## Load Data

In [3]:
def load_prompts(filepath: str) -> pd.DataFrame:
    """Load prompts with labels from JSONL file."""
    prompts = []
    labels = []
    with open(filepath, 'r') as f:
        for line in f:
            data = json.loads(line.strip())
            prompts.append(data['content'])
            labels.append(data['label'])
    return pd.DataFrame({'prompt': prompts, 'label': labels})

# Load prompts from multiple .jsonl files in PROMPTS_PATH into one dataframe
prompts_df = pd.DataFrame()
for file in os.listdir(PROMPTS_PATH):
    if file.endswith(".jsonl"):
        df = load_prompts(os.path.join(PROMPTS_PATH, file))
        prompts_df = pd.concat([prompts_df, df])

print(f"Loaded {prompts_df.shape[0]} prompts")

Loaded 140 prompts


## Load Model and SAE

In [4]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"Tokenizer loaded: {tokenizer.__class__.__name__}")

Tokenizer loaded: PreTrainedTokenizerFast


In [5]:
# Load model
device_map_value = device.index if device.type == 'cuda' and device.index is not None else str(device)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map={"": device_map_value}
)
model.eval()

print(f"Model loaded: {model.__class__.__name__}")
print(f"Model device: {next(model.parameters()).device}")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model loaded: LlamaForCausalLM
Model device: cuda:0


In [14]:
# Load SAE
ae_file_path = os.path.join(SAE_PATH, "ae.pt")
config_file_path = os.path.join(SAE_PATH, "config.json")

if os.path.exists(ae_file_path) and os.path.exists(config_file_path):
    print(f"✓ Found SAE files at: {os.path.dirname(ae_file_path)}")
else:
    print(f"SAE not found locally, downloading from {SAE_RELEASE}...")
    os.makedirs(os.path.dirname(ae_file_path), exist_ok=True)
    sae_path = f"resid_post_layer_{SAE_LAYER}/trainer_{SAE_TRAINER}"
    local_dir = SAE_BASE_PATH
    ae_file = hf_hub_download(repo_id=SAE_RELEASE, filename=f"{sae_path}/ae.pt", local_dir=local_dir)
    config_file = hf_hub_download(repo_id=SAE_RELEASE, filename=f"{sae_path}/config.json", local_dir=local_dir)

sae, _ = load_dictionary(SAE_PATH, device=device)
sae.eval()

print(f"SAE loaded with {sae.dict_size} features")
print(f"SAE device: {next(sae.parameters()).device}")

✓ Found SAE files at: /workspace/sae/llama-3.1-8b-instruct/saes/resid_post_layer_15/trainer_1
SAE loaded with 131072 features
SAE device: cuda:0


## Activation Extraction Functions

In [15]:
class StopForward(Exception):
    """Exception to stop forward pass after target layer."""
    pass

def find_assistant_position(input_ids: torch.Tensor, attention_mask: torch.Tensor, 
                          assistant_header: str, token_offset: int, tokenizer, device) -> int:
    """Find the position of the assistant token based on the given offset."""
    # Find assistant header position
    assistant_tokens = tokenizer.encode(assistant_header, add_special_tokens=False)
    
    # Find where assistant section starts
    assistant_pos = None
    for k in range(len(input_ids) - len(assistant_tokens) + 1):
        if torch.equal(input_ids[k:k+len(assistant_tokens)], torch.tensor(assistant_tokens).to(device)):
            assistant_pos = k + len(assistant_tokens) + token_offset
            break
    
    if assistant_pos is None:
        # Fallback to last non-padding token
        assistant_pos = attention_mask.sum().item() - 1
    
    # Ensure position is within bounds
    max_pos = attention_mask.sum().item() - 1
    assistant_pos = min(assistant_pos, max_pos)
    assistant_pos = max(assistant_pos, 0)
    
    return assistant_pos

@torch.no_grad()
def extract_activations_and_metadata(prompts: List[str], layer_idx: int) -> Tuple[torch.Tensor, List[Dict], List[str]]:
    """Extract activations and prepare metadata for all prompts."""
    all_activations = []
    all_metadata = []
    formatted_prompts_list = []
    
    # Get target layer
    target_layer = model.model.layers[layer_idx]
    
    # Process in batches
    for i in tqdm(range(0, len(prompts), BATCH_SIZE), desc="Processing batches"):
        batch_prompts = prompts[i:i+BATCH_SIZE]
        
        # Format prompts as chat messages
        formatted_prompts = []
        for prompt in batch_prompts:
            messages = [{"role": "user", "content": prompt}]
            formatted_prompt = tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=True
            )
            formatted_prompts.append(formatted_prompt)
        
        formatted_prompts_list.extend(formatted_prompts)
        
        # Tokenize batch
        batch_inputs = tokenizer(
            formatted_prompts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=MAX_LENGTH
        )
        
        # Move to device
        batch_inputs = {k: v.to(device) for k, v in batch_inputs.items()}
        
        # Hook to capture activations
        activations = None
        
        def hook_fn(module, input, output):
            nonlocal activations
            activations = output[0] if isinstance(output, tuple) else output
            raise StopForward()
        
        # Register hook
        handle = target_layer.register_forward_hook(hook_fn)
        
        try:
            _ = model(**batch_inputs)
        except StopForward:
            pass
        finally:
            handle.remove()
        
        # For each prompt in the batch, calculate positions for all token types
        for j, formatted_prompt in enumerate(formatted_prompts):
            attention_mask = batch_inputs["attention_mask"][j]
            input_ids = batch_inputs["input_ids"][j]
            
            # Calculate positions for all token types
            positions = {}
            for token_type, token_offset in TOKEN_OFFSETS.items():
                positions[token_type] = find_assistant_position(
                    input_ids, attention_mask, ASSISTANT_HEADER, token_offset, tokenizer, device
                )
            
            # Store the full activation sequence and metadata
            all_activations.append(activations[j].cpu())  # [seq_len, hidden_dim]
            all_metadata.append({
                'prompt_idx': i + j,
                'positions': positions,
                'attention_mask': attention_mask.cpu(),
                'input_ids': input_ids.cpu()
            })
    
    # Find the maximum sequence length across all activations
    max_seq_len = max(act.shape[0] for act in all_activations)
    hidden_dim = all_activations[0].shape[1]
    
    # Pad all activations to the same length
    padded_activations = []
    for act in all_activations:
        if act.shape[0] < max_seq_len:
            padding = torch.zeros(max_seq_len - act.shape[0], hidden_dim)
            padded_act = torch.cat([act, padding], dim=0)
        else:
            padded_act = act
        padded_activations.append(padded_act)
    
    return torch.stack(padded_activations, dim=0), all_metadata, formatted_prompts_list

@torch.no_grad()
def extract_token_activations(full_activations: torch.Tensor, metadata: List[Dict]) -> Dict[str, torch.Tensor]:
    """Extract activations for specific token positions from full sequence activations."""
    results = {}
    
    # Initialize results for each token type
    for token_type in TOKEN_OFFSETS.keys():
        results[token_type] = []
    
    # Extract activations for each token type
    for i, meta in enumerate(metadata):
        for token_type, position in meta['positions'].items():
            # Extract activation at the specific position
            activation = full_activations[i, position, :]  # [hidden_dim]
            results[token_type].append(activation)
    
    # Convert lists to tensors
    for token_type in TOKEN_OFFSETS.keys():
        results[token_type] = torch.stack(results[token_type], dim=0)
    
    return results

print("Activation extraction functions defined")

Activation extraction functions defined


## Extract Activations

In [16]:
# Extract activations for all positions first, then extract specific token positions
print("Extracting activations for all positions...")
full_activations, metadata, formatted_prompts = extract_activations_and_metadata(prompts_df['prompt'].tolist(), LAYER_INDEX)
print(f"Full activations shape: {full_activations.shape}")

# Extract activations for all token types
print("\nExtracting activations for specific token positions...")
token_activations = extract_token_activations(full_activations, metadata)

for token_type, activations in token_activations.items():
    print(f"Token type '{token_type}' activations shape: {activations.shape}")

Extracting activations for all positions...


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

Full activations shape: torch.Size([140, 160, 4096])

Extracting activations for specific token positions...
Token type 'asst' activations shape: torch.Size([140, 4096])
Token type 'endheader' activations shape: torch.Size([140, 4096])
Token type 'newline' activations shape: torch.Size([140, 4096])


## Apply SAE to Get Feature Activations (Optimized)

In [17]:
@torch.no_grad()
def get_sae_features_batched(activations: torch.Tensor) -> torch.Tensor:
    """Apply SAE to get feature activations with proper batching."""
    activations = activations.to(device)
    
    # Process in batches to avoid memory issues
    feature_activations = []
    
    for i in range(0, activations.shape[0], BATCH_SIZE):
        batch = activations[i:i+BATCH_SIZE]
        features = sae.encode(batch)  # [batch, num_features]
        feature_activations.append(features.cpu())
    
    return torch.cat(feature_activations, dim=0)

# Get SAE feature activations for specific token positions
print("Computing SAE features for specific token positions...")
token_features = {}

for token_type, activations in token_activations.items():
    print(f"Processing SAE features for token type '{token_type}'...")
    features = get_sae_features_batched(activations)
    token_features[token_type] = features
    print(f"Features shape for '{token_type}': {features.shape}")

# OPTIMIZATION: Pre-compute SAE features for ALL positions at once
print("\nOptimization: Pre-computing SAE features for all positions...")
print(f"Processing {full_activations.shape[0]} prompts with max {full_activations.shape[1]} tokens each...")

# Reshape to [total_positions, hidden_dim]
total_positions = full_activations.shape[0] * full_activations.shape[1]
reshaped_activations = full_activations.view(total_positions, -1)

# Apply SAE to all positions
full_sae_features = get_sae_features_batched(reshaped_activations)

# Reshape back to [num_prompts, seq_len, num_features]
full_sae_features = full_sae_features.view(full_activations.shape[0], full_activations.shape[1], -1)

print(f"Full SAE features shape: {full_sae_features.shape}")
print(f"✓ SAE features pre-computed for all positions")

print(f"\nCompleted SAE feature extraction for {len(token_features)} token types")

Computing SAE features for specific token positions...


Processing SAE features for token type 'asst'...
Features shape for 'asst': torch.Size([140, 131072])
Processing SAE features for token type 'endheader'...
Features shape for 'endheader': torch.Size([140, 131072])
Processing SAE features for token type 'newline'...
Features shape for 'newline': torch.Size([140, 131072])

Optimization: Pre-computing SAE features for all positions...
Processing 140 prompts with max 160 tokens each...
Full SAE features shape: torch.Size([140, 160, 131072])
✓ SAE features pre-computed for all positions

Completed SAE feature extraction for 3 token types


## Optimized Combined Analysis and Save Results

In [18]:
@torch.no_grad()
def filter_features_by_token_and_source(features_df: pd.DataFrame, token_type: str, source: str) -> pd.DataFrame:
    """Filter features from the features DataFrame based on token type and source."""
    filtered_df = features_df[
        (features_df['token'] == token_type) & 
        (features_df['source'] == source)
    ].copy()
    
    return filtered_df

@torch.no_grad()
def get_filtered_feature_activations(token_features: torch.Tensor, features_df: pd.DataFrame, 
                                   token_type: str, source: str) -> Tuple[torch.Tensor, torch.Tensor, pd.DataFrame]:
    """Get feature activations for filtered features based on token type and source."""
    # Filter features by token type and source
    filtered_features_df = filter_features_by_token_and_source(features_df, token_type, source)
    
    if len(filtered_features_df) == 0:
        print(f"Warning: No features found for token_type='{token_type}', source='{source}'")
        return torch.tensor([]), torch.tensor([]), filtered_features_df
    
    # Get feature indices
    feature_ids = filtered_features_df['feature_id'].tolist()
    feature_indices = torch.tensor(feature_ids, dtype=torch.long)
    
    # Extract activations for these features
    feature_activations = token_features[:, feature_indices]
    
    print(f"Found {len(feature_indices)} features for token_type='{token_type}', source='{source}'")
    
    return feature_indices, feature_activations, filtered_features_df

@torch.no_grad()
def collect_detailed_tokens_optimized(feature_id: int, feature_activations: torch.Tensor, 
                                     feature_idx: int, prompts_df: pd.DataFrame,
                                     full_sae_features: torch.Tensor, metadata: List[Dict],
                                     activation_threshold: float = 0.0) -> List[Dict]:
    """Collect detailed token activations for a specific feature using pre-computed SAE features."""
    # Find which prompts activate this feature
    activations = feature_activations[:, feature_idx]
    active_mask = activations > activation_threshold
    active_indices = torch.where(active_mask)[0]
    
    if len(active_indices) == 0:
        return []
    
    all_token_records = []
    
    for prompt_idx in active_indices:
        prompt_idx = int(prompt_idx)
        
        # OPTIMIZATION: Use pre-computed SAE features instead of re-computing
        feature_activations_sequence = full_sae_features[prompt_idx, :, feature_id]  # [seq_len]
        
        # Get tokenized input from metadata (cached)
        input_ids = metadata[prompt_idx]['input_ids']

        # Create tokenized prompt as list of token strings
        tokenized_prompt = [tokenizer.decode([int(token_id)]) for token_id in input_ids]
        
        # Get prompt info
        prompt_text = prompts_df.iloc[prompt_idx]["prompt"]
        
        # Collect token data for this prompt
        tokens = []
        for pos_idx in range(min(len(feature_activations_sequence), len(input_ids))):
            activation_val = float(feature_activations_sequence[pos_idx])
            
            if activation_val > 0:
                token_id = int(input_ids[pos_idx])
                token_text = tokenizer.decode([token_id])
                
                tokens.append({
                    'position': pos_idx,
                    'token_id': token_id,
                    'text': token_text,
                    'activation': activation_val
                })
        
        # Only add if we found tokens
        if tokens:
            # Sort tokens by activation (descending)
            tokens.sort(key=lambda x: x['activation'], reverse=True)
            
            all_token_records.append({
                'prompt_id': prompt_idx,
                'prompt_text': prompt_text,
                'prompt_label': prompts_df.iloc[prompt_idx]["label"],
                'prompt_feature_activation': float(activations[prompt_idx]),
                'tokenized_prompt': tokenized_prompt,
                'tokens': tokens
            })
    
    return all_token_records

# Pre-compute dashboard link template
def create_dashboard_link(feature_id: int) -> str:
    """Create dashboard link for a feature."""
    return f"{FEATURE_DASHBOARD_BASE_URL}?model={MODEL_TYPE}&layer={SAE_LAYER}&trainer={SAE_TRAINER}&fids={feature_id}"

print("Optimized analysis functions defined")

Optimized analysis functions defined


In [19]:
# Load target features from file
target_features_df = pd.read_csv(FEATURES_FILE)
print(f"Loaded {len(target_features_df)} total features from {FEATURES_FILE}")

# Prepare results for both CSV and JSONL
csv_results = []
jsonl_results = []
source_name = f"{MODEL_TYPE}_trainer{SAE_TRAINER}_layer{SAE_LAYER}"

print(f"Processing results for source: {source_name}")

# Process each token type
for token_type in TOKEN_OFFSETS.keys():
    print(f"\nProcessing token type: {token_type}")
    
    # Get filtered feature activations for this token type and source
    feature_indices, feature_activations, filtered_features_df = get_filtered_feature_activations(
        token_features[token_type], target_features_df, token_type, source_name
    )
    
    if len(filtered_features_df) == 0:
        print(f"No features found for token_type='{token_type}', source='{source_name}'. Skipping.")
        continue
    
    # Process each feature
    features_processed = 0
    features_skipped = 0
    
    for idx, (feature_idx, feature_id) in enumerate(zip(feature_indices, filtered_features_df['feature_id'])):
        feature_id = int(feature_id)
        activations = feature_activations[:, idx]  # [num_prompts]
        
        # Calculate statistics only on active features (activation > 0)
        active_mask = activations > 0
        active_activations = activations[active_mask]
        
        # Skip features that aren't active on any prompt
        if len(active_activations) == 0:
            features_skipped += 1
            continue
        
        # CSV statistics
        activation_mean = float(active_activations.mean())
        activation_max = float(active_activations.max())
        activation_min = float(active_activations.min())
        num_prompts = len(active_activations)
        
        # Add to CSV results
        csv_result = {
            'feature_id': feature_id,
            'activation_mean': activation_mean,
            'activation_max': activation_max,
            'activation_min': activation_min,
            'num_prompts': num_prompts,
            'chat_desc': '',
            'pt_desc': '',
            'type': '',
            'source': source_name,
            'token': token_type,
            'link': create_dashboard_link(feature_id)
        }
        csv_results.append(csv_result)
        
        # Collect detailed token data for JSONL (OPTIMIZED)
        detailed_tokens = collect_detailed_tokens_optimized(
            feature_id, feature_activations, idx, prompts_df, full_sae_features, metadata
        )
        
        # Add to JSONL results if we have detailed data
        if detailed_tokens:
            jsonl_result = {
                'feature_id': feature_id,
                'token': token_type,
                'source': source_name,
                'active_prompts': detailed_tokens
            }
            jsonl_results.append(jsonl_result)
        
        features_processed += 1
    
    print(f"Processed {features_processed} active features for token_type='{token_type}' (skipped {features_skipped} inactive)")

print(f"\nTotal features processed: {len(csv_results)} CSV, {len(jsonl_results)} JSONL")

Loaded 92 total features from ./results/1_personal/only_personal.csv
Processing results for source: llama_trainer1_layer15

Processing token type: asst
Found 2 features for token_type='asst', source='llama_trainer1_layer15'
Processed 1 active features for token_type='asst' (skipped 1 inactive)

Processing token type: endheader
Found 6 features for token_type='endheader', source='llama_trainer1_layer15'
Processed 2 active features for token_type='endheader' (skipped 4 inactive)

Processing token type: newline
Found 7 features for token_type='newline', source='llama_trainer1_layer15'
Processed 0 active features for token_type='newline' (skipped 7 inactive)

Total features processed: 3 CSV, 3 JSONL


In [20]:
# Save results
print("Saving results...")

# Convert CSV results to DataFrame
column_order = ['feature_id', 'activation_mean', 'activation_max', 'activation_min', 
                'num_prompts', 'chat_desc', 'pt_desc', 'type', 'source', 'token', 'link']
csv_df = pd.DataFrame(csv_results)[column_order]

# # Save CSV results
# if os.path.exists(OUTPUT_FILE):
#     csv_df.to_csv(OUTPUT_FILE, mode='a', header=False, index=False)
#     print(f"CSV results appended to existing file: {OUTPUT_FILE}")
# else:
#     csv_df.to_csv(OUTPUT_FILE, index=False)
#     print(f"CSV results saved to new file: {OUTPUT_FILE}")

# Save JSONL results
jsonl_results.sort(key=lambda x: x['feature_id'])  # Sort by feature_id
with open(PROMPT_OUTPUT_FILE, 'a') as f:
    for record in jsonl_results:
        f.write(json.dumps(record) + '\n')

print(f"JSONL results saved to {PROMPT_OUTPUT_FILE}")

# Show preview and summary
if len(csv_df) > 0:
    print(f"\nPreview of CSV data:")
    print(csv_df.head(10).to_string(index=False))
    
    print(f"\nSummary by token type:")
    summary = csv_df.groupby('token').agg({
        'activation_mean': ['count', 'mean', 'max'],
        'activation_max': 'max',
        'num_prompts': ['mean', 'max'],
        'feature_id': 'nunique'
    }).round(4)
    print(summary)

if len(jsonl_results) > 0:
    print(f"\nJSONL file structure summary:")
    total_prompt_records = 0
    total_token_records = 0
    for record in jsonl_results:
        num_prompts = len(record['active_prompts'])
        total_tokens = sum(len(prompt['tokens']) for prompt in record['active_prompts'])
        total_prompt_records += num_prompts
        total_token_records += total_tokens
        print(f"  Feature {record['feature_id']} ({record['token']}): {num_prompts} prompts, {total_tokens} tokens")
    
    print(f"\nTotal: {len(jsonl_results)} features, {total_prompt_records} prompt records, {total_token_records} token records")
    
    if len(jsonl_results) > 0:
        print(f"\nSample JSONL record structure:")
        sample_record = jsonl_results[0]
        print(f"Feature {sample_record['feature_id']} ({sample_record['token']}, {sample_record['source']}):")
        if sample_record['active_prompts']:
            first_prompt = sample_record['active_prompts'][0]
            print(f"  First prompt: {len(first_prompt['tokens'])} active tokens")
            if first_prompt['tokens']:
                print(f"  Top token: '{first_prompt['tokens'][0]['text']}' (activation: {first_prompt['tokens'][0]['activation']:.4f})")
else:
    print("No JSONL results to save.")

print(f"\n✓ Analysis complete!")

Saving results...
JSONL results saved to ./results/3_personal_general/3_personal_general_prompts.jsonl

Preview of CSV data:
 feature_id  activation_mean  activation_max  activation_min  num_prompts chat_desc pt_desc type                 source     token                                                                                          link
      27476         0.310405        0.338905        0.288229            5                        llama_trainer1_layer15      asst https://completely-touched-platypus.ngrok-free.app/?model=llama&layer=15&trainer=1&fids=27476
      59035         0.362718        0.435948        0.286962           14                        llama_trainer1_layer15 endheader https://completely-touched-platypus.ngrok-free.app/?model=llama&layer=15&trainer=1&fids=59035
      47776         0.400181        0.400181        0.400181            1                        llama_trainer1_layer15 endheader https://completely-touched-platypus.ngrok-free.app/?model=llama&layer=15&