In [1]:
%load_ext autoreload
%autoreload 2

from tiny_dashboard.feature_centric_dashboards import OfflineFeatureCentricDashboard
from transformers import AutoTokenizer
from collections import defaultdict
from pathlib import Path
import sqlite3
import tempfile
import json

## Offline Dashboard

In [2]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
test_texts = [
    "Hello, how are you?",
    "The quick brown fox jumps over the lazy dog.\n" * 20,
    "I love programming in Python.\n\n" * 100,
    "zaedazzazaaz" * 100,
    "a\na",
]
max_activation_examples: dict[int, list[tuple[float, list[str], list[float]]]] = (
    defaultdict(list)
)
for i in range(0, 100, 10):
    for j, text in enumerate(test_texts):
        toks = tokenizer.tokenize(text)
        acts = [0] * len(toks)
        acts[i % len(acts)] = j
        max_activation_examples[i].append((j, toks, acts))

dashboards = OfflineFeatureCentricDashboard(max_activation_examples, tokenizer)
dashboards.display()

VBox(children=(Text(value='', continuous_update=False, description='Feature:', placeholder='Type a feature num…

In [3]:
dashboards.export_to_html("test.html", 10)

### From Database

This should be much faster as we don't load all the examples into memory.

In [4]:
db_path = Path(tempfile.gettempdir()) / "test.db"
if not db_path.exists():
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute(
            """CREATE TABLE IF NOT EXISTS data_table (
                key INTEGER PRIMARY KEY,
                examples TEXT
            )"""
        )
        for key, examples in max_activation_examples.items():
            cursor.execute(
                "INSERT INTO data_table (key, examples) VALUES (?, ?)",
                (key, json.dumps(examples)),
            )
        conn.commit()

dashboards = OfflineFeatureCentricDashboard.from_db(db_path, tokenizer, column_name="examples")
dashboards.display()

VBox(children=(Text(value='', continuous_update=False, description='Feature:', placeholder='Type a feature num…

## Online Dashboard

In [5]:
from tiny_dashboard.feature_centric_dashboards import (
    AbstractOnlineFeatureCentricDashboard,
)
import torch as th


class DummyOnlineFeatureCentricDashboard(AbstractOnlineFeatureCentricDashboard):
    """Dummy implementation of AbstractOnlineFeatureCentricDashboard"""

    def generate_model_response(self, text: str) -> str:
        return text + "Dummy response"

    def get_feature_activation(
        self, text: str, feature_indicies: tuple[int, ...]
    ) -> th.Tensor:
        tok_len = len(self.tokenizer.encode(text))
        activations = th.randn((tok_len, len(feature_indicies))).exp()
        activations[activations < 3] = 0
        return activations


online_dashboards = DummyOnlineFeatureCentricDashboard(
    AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
)
online_dashboards.display()

Model is not set, disabling generate response checkbox


VBox(children=(Textarea(value='', description='Text:', layout=Layout(height='auto', width='100%'), placeholder…

If you hate classes, you can use the functions directly

In [7]:
from tiny_dashboard.feature_centric_dashboards import OnlineFeatureCentricDashboard


def get_feature_activation(text, feature_indicies):
    return th.randn((len(tokenizer.encode(text)), len(feature_indicies))).exp()


def generate_model_response(text):
    return text + "Dummy response"


online_dashboards_2 = OnlineFeatureCentricDashboard(
    get_feature_activation, tokenizer, generate_model_response,
)
online_dashboards_2.display()

VBox(children=(Textarea(value='', description='Text:', layout=Layout(height='auto', width='100%'), placeholder…