In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
# ! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
module_path = os.path.abspath(os.path.join('../../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from huggingface_hub import hf_hub_download, notebook_login
import numpy as np
import torch
from torch import nn

In [None]:
from sae_lens import SAE, HookedSAETransformer
from transformer_lens.utils import tokenize_and_concatenate

In [None]:
from transformer_lens import HookedTransformer

In [None]:
from datasets import load_dataset

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
from tqdm import tqdm

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from scipy.sparse import csr_matrix

In [None]:
import plotly.express as px

In [None]:
from src.lattmc.fca.utils import *
from src.lattmc.fca.data_utils import *
from src.lattmc.fca.image_utils import *
from src.lattmc.fca.models import *
from src.lattmc.fca.fca_utils import *
from src.lattmc.fca.image_gens import *

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2024

In [None]:
# For the most part I'll try to import functions and classes near where they are used
# to make it clear where they come from.

if torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {device}")

In [None]:
torch.__version__

In [None]:
np.__version__

## Initialize Path

In [None]:
PATH = Path('../data')
checkpoint_dir = PATH / 'saes'
dataset_suffix = 'wiki'
vectors_name = f'gpt2_small_10_{dataset_suffix}'
checkpoint_dir.mkdir(exist_ok=True, parents=True)
vectors_dir = checkpoint_dir / f'{vectors_name}_10_vecs'
vectors_dir.mkdir(exist_ok=True, parents=True)
matrix_dir = checkpoint_dir / f'{vectors_name}_10_mats'
matrix_dir.mkdir(exist_ok=True, parents=True)
vectors_path = checkpoint_dir / f'{vectors_name}_10_vecs.joblib'

image_dir = PATH / 'images'
image_path = image_dir / '1024.png'

## Initialize simple dataset

In [None]:
dataset = load_dataset(
    path=f'NeelNanda/{dataset_suffix}-10k',
    split='train',
    streaming=False,
)

## Initialize model

In [None]:
class Text2Latent(object):

    def __init__(self, model: nn.Module, sae: nn.Module):
        self.model = model.eval()
        self.sae = sae.eval()
        self.hook_point = sae.cfg.hook_name

    @torch.inference_mode()
    def tokenize(self, text):
        return self.model.to_tokens(text)

    @torch.inference_mode()
    def to_string(self, tokens):
        return self.model.to_string(tokens)

    @torch.inference_mode()
    def encode(self, text):
        _, cache = self.model.run_with_cache(text, prepend_bos=True)
        # get the feature activations from our SAE
        z = self.sae.encode(cache[self.hook_point])
        
        return z

    @torch.inference_mode()
    def decode(self, z):
        return self.sae.decode(z)

    @torch.inference_mode()
    def forward(self, text):
        z = self.encode(text)
        r = self.decode(z)

        return r

In [None]:
def gen_concept(idx, val, shape=24576):
    v_idx = np.zeros((shape,), dtype=float)
    v_idx[idx] = val
    concept = fca.G_FG(v_idx)
    S = dsort(V[concept.A], idx)
    
    return v_idx, concept, concept.A[S]

In [None]:
layer = 10

In [None]:
model_name = 'gpt2-small'
release = 'gpt2-small-res-jb'
sae_id = f'blocks.{layer}.hook_resid_pre'
sae, cfg_dict, sparsity = SAE.from_pretrained(
    release=release,  # see other options in sae_lens/pretrained_saes.yaml
    sae_id=sae_id,  # won't always be a hook point
    device=device,
)
hook_point = sae.cfg.hook_name
print(hook_point)

In [None]:
model = HookedTransformer.from_pretrained(model_name, device=device)

In [None]:
net = Text2Latent(model, sae)

## Generate V Lattice

In [None]:
gc.collect()

In [None]:
if any(Path(matrix_dir).iterdir()):
    print(f'{matrix_dir} is not empty')
else:
    with tqdm(dataset) as pdata:
        for idx, d in enumerate(pdata):
            t = d['text']
            v = net.encode(t)
            v_sparse = csr_matrix(v.to('cpu').detach().numpy()[0])
            joblib.dump(
                v_sparse, 
                matrix_dir / f'{idx}.joblib'
            )

In [None]:
gc.collect()

In [None]:
if vectors_path.exists():
    V = joblib.load(vectors_path)
    print(f'Vectors are loaded from {vectors_path}')
else:
    v_paths = list(matrix_dir.glob('*.joblib'))
    error_paths = []
    V_dict = {}
    V_list = []
    with tqdm(v_paths) as v_ppaths:
        for v_path in v_ppaths:
            v_sparse = joblib.load(v_path)
            vs = v_sparse.toarray()[1:]
            v = np.maximum.reduce(vs)
            V_dict[int(v_path.stem)] = v
    with tqdm(list(range(10000))) as prange:
        for k in prange:
            V_list.append(V_dict[k])
    V = np.array(V_list)
    joblib.dump(V, vectors_path)    

## Generate Context and Analyze

In [None]:
gc.collect()

In [None]:
fca = FCA(V)

In [None]:
text1 = "The Golden Gate Bridge"
z = net.encode(text1)
zs = z.to('cpu').detach().numpy()[0]
v = np.maximum.reduce(zs)

In [None]:
zs[1:].shape

In [None]:
zs.shape

In [None]:
v[19837]

## Detect Negation

In [None]:
w1 = 'Bird'
w2 = 'can'
w3 = 'fly'
w4 = 'hunt'

In [None]:
ws = [w1, w2, w3, w4]

In [None]:
text = ' '.join(ws)
text

In [None]:
vs = net.encode(text)

In [None]:
ts = net.tokenize(text)

In [None]:
ts.shape

In [None]:
net.to_string(ts[0][4])

In [None]:
vs.shape

In [None]:
ts = net.tokenize(w1)[0]
net.to_string(ts[1])

In [None]:
ts = net.tokenize(w2)[0]
net.to_string(ts[1])

In [None]:
ts = net.tokenize(w3)[0]
net.to_string(ts[1])

In [None]:
ts = net.tokenize(w4)[0]
net.to_string(ts[1])

In [None]:
v1 = net.encode(w1)[0][1] / 10
v2 = net.encode(w2)[0][1] / 10
v3 = net.encode(w3)[0][1] / 10
v4 = net.encode(w4)[0][1] / 10
v1.shape

In [None]:
to_numpy(v1)[np.argmax(to_numpy(v1))]

In [None]:
to_numpy(v2)[np.argmax(to_numpy(v2))]

In [None]:
to_numpy(v3)[np.argmax(to_numpy(v3))]

In [None]:
to_numpy(v4)[np.argmax(to_numpy(v4))]

In [None]:
vt = vs[0][1:]

In [None]:
vt.shape

In [None]:
c1 = fca.G_FG(v1)
c2 = fca.G_FG(v2)
c3 = fca.G_FG(v3)
c4 = fca.G_FG(v4)

In [None]:
c3

In [None]:
dataset[c3.A]

In [None]:
c1 & c2

## Landmark Detection?

In [None]:
text = "Golden Gate Bridge"
z = net.encode(text)
zs = z.to('cpu').detach().numpy()[0]

In [None]:
z[0].shape

In [None]:
l = meet(zs[1], zs[2])

In [None]:
topK(l, 20)

In [None]:
concept = fca.G_FG(l)
concept

In [None]:
dataset[concept.A[0].item()]

## Landmark Detection?

In [None]:
import numpy as np
a = np.array([[9,2,3],[4,5,6],[7,0,5]])
col_index = 0
sorted_indices = np.argsort(a[:, col_index])[::-1]
sorted_array = a[sorted_indices]
print("Sorted array:", sorted_array)
print("Original indices:", sorted_indices)

In [None]:
np.argsort(

In [None]:
text = "Golden Gate Bridge is an iconic landmark in San Francisco"
z = net.encode(text)
zs = z.to('cpu').detach().numpy()[0]

In [None]:
z[0].shape

In [None]:
tokens = net.tokenize(text)[0]
words = [net.to_string(t) for t in tokens]
tokens, words

In [None]:
l = meet(zs[1], zs[2])

In [None]:
topK(l, 20)

In [None]:
concept = fca.G_FG(l)
concept

In [None]:
S = dsort(V[concept.A], 21286)

In [None]:
dataset[S[0].item()]

## Species Detection?

In [None]:
text = "Golden Retriever"
z = net.encode(text)
zs = z.to('cpu').detach().numpy()[0]

In [None]:
tokens = net.tokenize(text)
tokens

In [None]:
tokens[0][2:]

In [None]:
net.to_string(4990)

In [None]:
z[0].shape

In [None]:
zs.shape

In [None]:
l = join_all(zs[2:])

In [None]:
l = meet(zs[2], l)

In [None]:
topK(l, 20)

In [None]:
concept = fca.G_FG(l / 8)
concept

In [None]:
S = dsort(V[concept.A], 6950)

In [None]:
dataset[S[0].item()]

In [None]:
v_test, concept_test, S = gen_concept(12610, 12)
concept_test

In [None]:
dataset[S[0].item()]

## Golden Feature from Neuronscope

In [None]:
v_golden, concept, S = gen_concept(19837, 90)
v_golden.shape, concept

In [None]:
concept.A, S

In [None]:
for v_s in V[S]:
    print(v_s[19837])

In [None]:
V[S][:, 19837]

In [None]:
dataset[S[0].item()]

#### Find common features

In [None]:
tk = topK(concept.v, 40)
tk

In [None]:
idx = 5
args = tk[1][idx], tk[0][idx]
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## Cat Feature from Neuronscope

In [None]:
v_cat, concept = gen_concept(16899, 80)
v_cat.shape, concept

In [None]:
dataset[concept.A[0].item()]

## Apple Feature from Neuronscope

In [None]:
v_apple, concept = gen_concept(4269, 70)
v_apple.shape, concept

In [None]:
dataset[concept.A[0].item()]

#### Find common features

In [None]:
idx = 34
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx]
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## Thunder and Lightning Feature from Neuronscope

In [None]:
gc.collect()

In [None]:
v_thunder, concept = gen_concept(23123, 20)
v_thunder.shape, concept

In [None]:
dataset[concept.A[0].item()]

In [None]:
idx = 4
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx]
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## School and Lightning Feature from Neuronscope

In [None]:
v_school, concept = gen_concept(20781, 84)
v_school.shape, concept

In [None]:
dataset[concept.A[0].item()]

## King and Lightning Feature from Neuronscope

In [None]:
v_king, concept = gen_concept(17624, 60)
v_king.shape, concept

In [None]:
dataset[concept.A[1].item()]

In [None]:
idx = 3
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx] + 20
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## Orgs and Lightning Feature from Neuronscope

In [None]:
v_orgs, concept = gen_concept(16660, 60)
v_orgs.shape, concept

In [None]:
dataset[concept.A[0].item()]

In [None]:
idx = 2
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx] + 8
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## States and Lightning Feature from Neuronscope

In [None]:
v_state, concept = gen_concept(22, 10)
v_state.shape, concept

In [None]:
dataset[concept.A[0].item()]

## Joint concepts

In [None]:
idx = 1
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx] + 4
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

In [None]:
# topK(v_orgs, 4), topK(v_school, 4)

In [None]:
v_orgs = np.zeros_like(to_numpy(v))
v_school = np.zeros_like(to_numpy(v))
v_having = np.zeros_like(to_numpy(v))

In [None]:
v_orgs[16660] = 20
v_school[17624] = 20
v_having[17935] = 6

In [None]:
orgs = fca.G_FG(v_orgs)
schools = fca.G_FG(v_school)
havings = fca.G_FG(v_having)
orgs, schools, havings

In [None]:
org_school = orgs & schools
org_school

In [None]:
dataset[org_school.A[0].item()]['text']

In [None]:
org_school_havings = orgs & schools & havings 
org_school_havings

In [None]:
dataset[org_school_havings.A[0].item()]['text']

## Recipie Feature from Neuronscope

In [None]:
v_recipie, concept = gen_concept(7, 28)
v_recipie.shape, concept

In [None]:
dataset[concept.A[0].item()]

In [None]:
idx = 1
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx]
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## Election Feature from Neuronscope

In [None]:
v_elect, concept = gen_concept(29, 28)
v_elect.shape, concept

In [None]:
dataset[concept.A[0].item()]

In [None]:
idx = 2
tk = topK(concept.v, 40)
args = tk[1][idx], tk[0][idx]
v_test, concept_test = gen_concept(*args)
print(f'{args}, {v_test.shape}, {concept_test}')
dataset[concept_test.A[0].item()]

## Mixture of Election and Party

In [None]:
v_elect, concept_elects = gen_concept(29, 2)
v_elect.shape, concept

## Food Features

In [None]:
texts = [
    'food recipie',
    'love',
    'admire',
    'sex',
]

In [None]:
v = net.encode(texts[2])[0][1]
concept = fca.G_FG(v / 4)
concept

In [None]:
dataset[concept.A[3].item()]

In [None]:
topK(v, 10)

In [None]:
v_love, recipie = gen_concept(14654, 20)
v_love.shape, concept

In [None]:
v_admire, recipie = gen_concept(14990, 20)
v_admire.shape, concept

In [None]:
dataset[concept.A[0].item()]

## Verbal Feature from Neuronscope

In [None]:
v_verbal, concept = gen_concept(33, 40)
v_verbal.shape, concept

In [None]:
dataset[concept.A[0].item()]

## Border Feature from Neuronscope

In [None]:
v_border, concept = gen_concept(35, 20)
v_border.shape, concept

In [None]:
dataset[concept.A[0].item()]

## Cross Concept

In [None]:
v_verbal, concept_verbal = gen_concept(33, 16)
v_border, concept_border = gen_concept(35, 16)

In [None]:
cross = concept_verbal & concept_border
cross

In [None]:
dataset[cross.A[0].item()]

## Cross Concept Probability

In [None]:
v_posib, concept_posib = gen_concept(1061, 20)
v_posit, concept_posit = gen_concept(809, 20)
v_liklh, concept_liklh = gen_concept(418, 20)
v_qualt, concept_qualt = gen_concept(129, 20)

In [None]:
cross_pr_ps = concept_posib & concept_posit
cross_pr_ps

In [None]:
dataset[cross_pr_ps.A[0].item()]

In [None]:
cross_lk_ql = concept_liklh & concept_qualt
cross_lk_ql

In [None]:
dataset[cross_lk_ql.A[0].item()]

## Context Features

In [None]:
v_peval, concept_peval = gen_concept(3109, 20)
v_evstt, concept_evstt = gen_concept(3784, 20)
v_sosis, concept_sosis = gen_concept(3702, 20)
v_agevs, concept_agevs = gen_concept(3388, 20)

In [None]:
concept_pe_ev = concept_peval & concept_evstt
concept_pe_ev

In [None]:
dataset[concept_pe_ev.A[0].item()]

In [None]:
concept_so_ag = concept_sosis & concept_agevs
concept_so_ag

In [None]:
dataset[concept_so_ag.A[0].item()]

In [None]:
concept_all = concept_peval & concept_evstt & concept_sosis & concept_agevs
concept_all

In [None]:
dataset[concept_all.A[0].item()]

## Computer and Technilogies Concepts

In [None]:
v_comps, concept_comps = gen_concept(20542, 14)
v_comps.shape, concept_comps

In [None]:
dataset[concept_comps.A[0].item()]

## Innovation Features

In [None]:
v_innov, concept_innov = gen_concept(2503, 10)
v_innov.shape, concept_innov

In [None]:
dataset[concept_innov.A[0].item()]

In [None]:
concept_co_in = concept_comps & concept_innov
concept_co_in

In [None]:
dataset[concept_co_in.A[0].item()]

## Add Examples

In [None]:
text1 = " The Golden Gate Bridge"
z = net.encode(text1)
tokens = net.tokenize(text1)
print(torch.topk(z, 12))
print()
print(z)

In [None]:
r = net.encode('Golden')
torch.topk(r[0][1], 50)

In [None]:
torch.nonzero(r[0][1]).shape

In [None]:
texts = [
    "Golden Gate Bridge",
    "New York City",
    "Silicon Valley",
    "The White House",
    "Apple Inc."
]

In [None]:
texts = [
    "School of AI",
    "School of Economics",
    "School of Medicine",
    "School of Arts",
    "School of Technologies"
]

In [None]:
texts = "Golden Retriever"
tokens = net.tokenize(texts)
tokens, net.to_string(tokens)

In [None]:
texts = "Gold"
tokens = net.tokenize(texts)
vs = net.encode(texts)
tokens, net.to_string(tokens)

In [None]:
net.to_string(32378)

In [None]:
vs = net.encode(texts)

In [None]:
vs.shape, vs[0].shape

In [None]:
v_t = [v[1] for v in vs[:3]]
v_t[0].shape

In [None]:
v_A = v_t[0]
for v in v_t:
    v_A = torch.minimum(v_A, v)

In [None]:
vs[0][0].shape, 19837

In [None]:
torch.topk(vs[0][1], 20)

In [None]:
torch.topk(vs[0][1], 20)

In [None]:
torch.topk(vs[0][1], 20)

In [None]:
torch.topk(v_A, 30)

## Golden Second Index

In [None]:
v_1, concept_1 = gen_concept(21286, 16)
v_1.shape, concept_1

In [None]:
dataset[concept_1.A[0].item()]

## Gold Second Index

In [None]:
v_2, concept_2 = gen_concept(9572, 30)
v_2.shape, concept_2

In [None]:
dataset[concept_2.A[0].item()]

In [None]:
concept_m = concept_1 & concept_2
concept_m

In [None]:
dataset[concept_m.A[1].item()]

In [None]:
tk = topK(V[0], 30)
tk

In [None]:
v_A = np.zeros_like(v_t[0].to('cpu').detach().numpy())
v_A[17943] = 62
v_A[11811] = 6
v_A[15823] = 4
v_A[4507] = 4
v_A[20161] = 4
concept = fca.G_FG(v_A)
concept

In [None]:
concept.A.shape, V.shape, V.shape[0] - concept.A.shape[0]

In [None]:
concept.A[0]

In [None]:
dataset[concept.A[2].item()]

## Analyze Tokens

In [None]:
len(dataset)

In [None]:
checks = []
with tqdm(list(range(len(dataset)))) as pdata:
    for idx in pdata:
        tokens = net.tokenize(dataset[idx]['text'])[0]
        vs = joblib.load(matrix_dir / f'{idx}.joblib')
        checks.append(tokens.shape[0] == vs.shape[0])

In [None]:
np.all(checks)

In [None]:
from IPython.display import clear_output
v_paths = list(matrix_dir.glob('*.joblib'))
T_dict = {}
W_dict = {}
with tqdm(concept.A) as v_ppaths:
    for idx in v_ppaths:
        vs = joblib.load(matrix_dir / f'{idx}.joblib').toarray()
        G_x = find_G_x(vs, v_A)
        if G_x.shape[0] > 0:
            T_dict[idx.item()] = G_x
        clear_output(wait=True)

In [None]:
len(T_dict)

In [None]:
T_dict[5]

In [None]:
tokens = net.tokenize(dataset[5]['text'])

In [None]:
net.to_string(tokens[0][100])

In [None]:
T_dict[5]

In [None]:
net.to_string(tokens[0][0]), net.to_string(tokens[0][T_dict[5]])

In [None]:
W_dict = {}
for k, v in T_dict.items():
    tokens = net.tokenize(dataset[k]['text'])[0]
    W_dict[k] = net.to_string(tokens[v])

In [None]:
W_dict