In [1]:
%load_ext autoreload
%autoreload 3

from src.feature_centric_dashboards import OfflineFeatureCentricDashboard
from transformers import AutoTokenizer
from collections import defaultdict

In [3]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
test_texts = [
    "Hello, how are you?",
    "The quick brown fox jumps over the lazy dog." * 20,
    "I love programming in Python."* 100,
    "zaedazzazaaz" * 100,
]
max_activation_examples: dict[int, list[tuple[float, list[str], list[float]]]] = defaultdict(list)
for i in range(0, 100, 10):
    for j, text in enumerate(test_texts):
        toks = tokenizer.tokenize(text)
        acts = [0] * len(toks)
        acts[i % len(acts)] = j 
        max_activation_examples[i].append((j, toks, acts))

dashboards = OfflineFeatureCentricDashboard(max_activation_examples, tokenizer)
dashboards.display()

VBox(children=(Text(value='', continuous_update=False, description='Feature:', placeholder='Type a feature num…

In [4]:
dashboards.export_to_html("test.html", 10)

In [5]:
from src.feature_centric_dashboards import AbstractOnlineFeatureCentricDashboard
import torch as th


class DummyOnlineFeatureCentricDashboard(AbstractOnlineFeatureCentricDashboard):
    """Dummy implementation of AbstractOnlineFeatureCentricDashboard"""

    def generate_model_response(self, text: str) -> str:
        return text + "Dummy response"

    def get_feature_activation(
        self, text: str, feature_indicies: tuple[int, ...]
    ) -> th.Tensor:
        tok_len = len(self.tokenizer.encode(text))
        activations = th.randn((tok_len, len(feature_indicies))).exp()
        activations[activations < 3] = 0
        return activations


online_dashboards = DummyOnlineFeatureCentricDashboard(tokenizer)
online_dashboards.display()

VBox(children=(Textarea(value='', description='Text:', layout=Layout(height='auto', width='100%'), placeholder…

If you hate classes, you can use the functions directly

In [4]:
from src.feature_centric_dashboards import OnlineFeatureCentricDashboard


def get_feature_activation(text, feature_indicies):
    return th.randn((len(tokenizer.encode(text)), len(feature_indicies))).exp()


def generate_model_response(text):
    return text + "Dummy response"


online_dashboards_2 = OnlineFeatureCentricDashboard(
    get_feature_activation, generate_model_response, tokenizer
)
online_dashboards_2.display()

VBox(children=(Textarea(value='', description='Text:', layout=Layout(height='auto', width='100%'), placeholder…