# Example 3: Load Concepts and Demonstrate Activation Manipulation

This notebook demonstrates how to:
1. Load the language model and trained SAE from previous examples
2. Load curated concepts from the manual curation process
3. Attach the concept dictionary to the SAE
4. Demonstrate inference with manipulated activations
5. Create custom activation controllers to amplify or suppress specific concepts

This example shows how to use curated concepts to understand and control what the model generates.


In [4]:
# Setup and imports
%load_ext autoreload
%autoreload 2

import torch
import json
from pathlib import Path
from datetime import datetime

from amber.store import LocalStore
from amber.language_model.language_model import LanguageModel
from amber.mechanistic.sae.modules.topk_sae import TopKSae
from amber.mechanistic.sae.concepts.concept_dictionary import ConceptDictionary

print("‚úÖ Imports completed")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
‚úÖ Imports completed


In [7]:
# Configuration
print("üöÄ Starting Concept Loading and Neuron Manipulation Example")

MODEL_ID_HF = "sshleifer/tiny-gpt2"
STORE_DIR = Path("store")
MODEL_DIR = STORE_DIR / MODEL_ID_HF.replace("/", "_")
training_metadata_path = MODEL_DIR / "training_metadata.json"
attachment_metadata_path = MODEL_DIR / "attachment_metadata.json"

if not training_metadata_path.exists():
    print(f"‚ùå Error: training_metadata.json not found at {training_metadata_path}!")
    print("   Please run 01_train_sae_model.ipynb first")
    raise FileNotFoundError(f"training_metadata.json not found at {training_metadata_path}")

if not attachment_metadata_path.exists():
    print(f"‚ö†Ô∏è Warning: attachment_metadata.json not found at {attachment_metadata_path}")
    print("   This is optional - you can still load concepts without it")

# Load metadata
with open(training_metadata_path, "r") as f:
    training_metadata = json.load(f)

attachment_metadata = {}
if attachment_metadata_path.exists():
    with open(attachment_metadata_path, "r") as f:
        attachment_metadata = json.load(f)

# Configuration from metadata
MODEL_ID = training_metadata["model_id"]
LAYER_SIGNATURE = training_metadata["layer_signature"]
SAE_MODEL_PATH = Path(training_metadata["sae_model_path"])
CACHE_DIR = Path(training_metadata.get("cache_dir", MODEL_DIR / "cache"))
STORE_DIR = Path(training_metadata.get("store_dir", STORE_DIR))
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Check for curated concepts (saved under model directory)
CURATED_CONCEPTS_CSV = MODEL_DIR / "curated_concepts.csv"
CURATED_CONCEPTS_JSON = MODEL_DIR / "curated_concepts.json"

if not CURATED_CONCEPTS_CSV.exists() and not CURATED_CONCEPTS_JSON.exists():
    print("‚ö†Ô∏è Warning: No curated concepts found!")
    print(f"   Expected at: {CURATED_CONCEPTS_CSV} or {CURATED_CONCEPTS_JSON}")
    print("   Please run the manual curation process first")
    print("   You can create a simple CSV with format: neuron_idx,concept_name,score")

print(f"üîß Model: {MODEL_ID}")
print(f"üéØ Target layer: {LAYER_SIGNATURE}")
print(f"üß† SAE model: {SAE_MODEL_PATH}")
print(
    f"üìä Curated concepts: {CURATED_CONCEPTS_CSV if CURATED_CONCEPTS_CSV.exists() else (CURATED_CONCEPTS_JSON if CURATED_CONCEPTS_JSON.exists() else 'Not found')}")
print()


üöÄ Starting Concept Loading and Neuron Manipulation Example
   Expected at: store/sshleifer_tiny-gpt2/curated_concepts.csv or store/sshleifer_tiny-gpt2/curated_concepts.json
   Please run the manual curation process first
   You can create a simple CSV with format: neuron_idx,concept_name,score
üîß Model: sshleifer/tiny-gpt2
üéØ Target layer: gpt2lmheadmodel_transformer_h_0_attn_c_attn
üß† SAE model: store/sshleifer_tiny-gpt2/topk_sae_model.pt
üìä Curated concepts: Not found



In [8]:
# Step 1: Load language model
print("üì• Loading language model...")

# Create LocalStore for the model
store = LocalStore(MODEL_DIR)

# Load model and move to device
model = LanguageModel.from_huggingface(MODEL_ID, store=store)
model.model.to(DEVICE)

# Optional: set experiment metadata
model.context.experiment_name = "concept_manipulation"
model.context.run_id = f"manipulation_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
model.context.max_length = 64

print(f"‚úÖ Model loaded: {model.model_id}")
print(f"üì± Device: {DEVICE}")
print(f"üîß Context: {model.context.experiment_name}/{model.context.run_id}")


üì• Loading language model...
‚úÖ Model loaded: sshleifer_tiny-gpt2
üì± Device: cpu
üîß Context: concept_manipulation/manipulation_20251117_213523


In [9]:
# Step 2: Load trained SAE
print("üì• Loading trained SAE...")
if not SAE_MODEL_PATH.exists():
    print(f"‚ùå Error: SAE model not found at {SAE_MODEL_PATH}")
    print("   Please run 01_train_sae_model.ipynb first")
    raise FileNotFoundError(f"SAE model not found at {SAE_MODEL_PATH}")

sae = TopKSae.load(SAE_MODEL_PATH)

# Update SAE context with current experiment info
sae.context.experiment_name = "concept_manipulation"
sae.context.run_id = f"manipulation_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

print(
    f"‚úÖ SAE loaded: {training_metadata['hidden_dim']} ‚Üí {training_metadata['n_latents']} ‚Üí {training_metadata['hidden_dim']}")
print(f"üîß Context: {sae.context.experiment_name}/{sae.context.run_id}")
print(f"üìä TopK parameter: k={training_metadata.get('k', 'N/A')}")
print("‚úÖ Trained SAE loaded")


2025-11-17 21:35:28,390 [INFO] amber.mechanistic.sae.modules.topk_sae: 
Loaded TopKSAE from store/sshleifer_tiny-gpt2/topk_sae_model.pt
n_latents=24, n_inputs=6, k=8


üì• Loading trained SAE...
‚úÖ SAE loaded: 6 ‚Üí 24 ‚Üí 6
üîß Context: concept_manipulation/manipulation_20251117_213528
üìä TopK parameter: k=8
‚úÖ Trained SAE loaded


In [56]:
# Step 3: Load curated concepts
print("üì• Loading curated concepts...")

# Try to load from CSV first, then JSON
concept_dict = None
if CURATED_CONCEPTS_CSV.exists():
    print(f"üìÑ Loading from CSV: {CURATED_CONCEPTS_CSV}")
    concept_dict = ConceptDictionary.from_csv(CURATED_CONCEPTS_CSV, n_size=training_metadata["n_latents"])
elif CURATED_CONCEPTS_JSON.exists():
    print(f"üìÑ Loading from JSON: {CURATED_CONCEPTS_JSON}")
    concept_dict = ConceptDictionary.from_json(CURATED_CONCEPTS_JSON, n_size=training_metadata["n_latents"])
else:
    print("‚ùå Error: No curated concepts file found!")
    print(f"   Expected at: {CURATED_CONCEPTS_CSV} or {CURATED_CONCEPTS_JSON}")
    raise FileNotFoundError("No curated concepts file found")

print(f"‚úÖ Loaded concept dictionary with {concept_dict.n_size} neurons")
print(f"üìä Total concepts: {sum(1 for i in range(concept_dict.n_size) if concept_dict.get(i) is not None)}")

# Show some concepts
print("\nüîç Sample concepts:")
for neuron_idx in range(min(5, concept_dict.n_size)):
    concept = concept_dict.get(neuron_idx)
    if concept:
        print(f"   Neuron {neuron_idx}: '{concept.name}' (score: {concept.score:.3f})")
    else:
        print(f"   Neuron {neuron_idx}: no concept")
print()


üì• Loading curated concepts...
üìÑ Loading from JSON: store/sshleifer_tiny-gpt2/curated_concepts.json
‚úÖ Loaded concept dictionary with 24 neurons
üìä Total concepts: 24

üîç Sample concepts:
   Neuron 0: 'family relationships' (score: 0.900)
   Neuron 1: 'nature and outdoors' (score: 0.900)
   Neuron 2: 'problem solving' (score: 0.900)
   Neuron 3: 'emotional states' (score: 0.850)
   Neuron 4: 'social interactions' (score: 0.900)



In [57]:
sae.attach_dictionary(concept_dict)

In [58]:
from amber.adapters import TextDataset

HF_DATASET = "roneneldan/TinyStories"
DATA_SPLIT = "train"
TEXT_FIELD = "text"
DATA_LIMIT = 500
MAX_LENGTH = 64

dataset = TextDataset.from_huggingface(
    HF_DATASET,
    split=DATA_SPLIT,
    cache_dir=str(CACHE_DIR),
    text_field=TEXT_FIELD,
    limit=DATA_LIMIT,
)

Saving the dataset (1/1 shards): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 500/500 [00:00<00:00, 285365.63 examples/s]


In [59]:
for batch_index, texts in enumerate(dataset.iter_batches(32)):
    output = model.generate(texts)
    break