In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
# ! git pull

## Add import path

In [None]:
import gc
import os
import sys

In [None]:
def add_library_level(level=4):
    suf_path = ['..']
    path = '..'
    for i in range(0, level):
        join_path = suf_path * i
        path = '/'.join(join_path)
        module_path = os.path.abspath(os.path.join(path))
        if module_path not in sys.path:
            sys.path.append(module_path)
            print(f'Appendeding {path}')

In [None]:
add_library_level(level=5)

## Organize imports

In [None]:
import multiprocessing

In [None]:
from src.lattmc.fca.utils import *
from src.lattmc.fca.fca_utils import *

In [None]:
from src.lattmc.tc.transcoder_analyzers import ConceptAnalysis, init_analyzer

In [None]:
from src.lattmc.sae.nlp_sae_utils import init_device, gen_concept

In [None]:
import logging

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2025

In [None]:
logging.basicConfig(level=logging.INFO)

In [None]:
device = init_device()
device

In [None]:
torch.__version__

In [None]:
np.__version__

In [None]:
# np.set_printoptions(precision=4, suppress=True)

## Initialize Paths

In [None]:
PATH = Path('data')
GPT2 = PATH / 'transcoders' / 'gpt2'
OWT_TOKENS_DIR = GPT2 / 'owt_tokens'
TOKENS_PATH = OWT_TOKENS_DIR / 'owt_tokens_torch.pt'
OWT_TOKENS_DIR.mkdir(exist_ok=True, parents=True)

## Load trancoders

In [None]:
layers = list(range(12))
layers = [0, 4, 6, 8, 10, 11]

In [None]:
tr_analyzer = init_analyzer(
    layers,
    TOKENS_PATH,
    GPT2,
    device=device
)

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

## Detect pets

In [None]:
concept_an = ConceptAnalysis(
    ' The dog and cat are familiar domestic animals and beloved pets; both the dog and the cat also feature in various mythological stories.',
    tr_analyzer
)

In [None]:
concept_an.analyze_concepts()

In [None]:
t_idcs = [3, 13]
t_idcs = [10, 11, 12]
t_idcs = [10, 11]
t_idcs = [7, 15]
t_idcs = [1, 15]
t_idcs = [1]
t_idcs = [2]
layer = 0

In [None]:
concept_an.gen_text(t_idcs, layer, rng=1)

In [None]:
concept_an.c_is

In [None]:
v0_FG = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v0_FG, v0_FG.shape[0])
vals, idcs

In [None]:
v0_FG = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v0_FG, v0_FG.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
layer = 8

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
c = concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
tr_analyzer.to_string(tr_analyzer.tokens[1281])

In [None]:
layer = 11

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.corpus.shape

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
v_c, ids = concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
count = 0
for i_d in ids:
    for i_v in i_d:
        count += i_v.size
count

In [None]:
len(ids)

## Detect dog and cat

In [None]:
concept_an = ConceptAnalysis(
    ' The dog and cat are familiar domestic animals and beloved pets; both the dog and the cat also feature in various mythological stories.',
    tr_analyzer
)

In [None]:
concept_an = ConceptAnalysis(
    ' cat dog.',
    tr_analyzer
)

In [None]:
concept_an.analyze_concepts()

In [None]:
t_idcs = [3, 13]
t_idcs = [10, 11, 12]
t_idcs = [10, 11]
t_idcs = [7, 15]
t_idcs = [1, 15]
t_idcs = [1]
t_idcs = [2]
t_idcs = [1]
layer = 0

In [None]:
concept_an.gen_text(t_idcs, layer, rng=1)

In [None]:
concept_an.c_is

In [None]:
v0_FG = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v0_FG, v0_FG.shape[0])
vals, idcs

In [None]:
v0_FG = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v0_FG, v0_FG.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
layer = 8

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
c = concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
tr_analyzer.to_string(tr_analyzer.tokens[1281])

In [None]:
layer = 11

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.corpus.shape

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
v_c, ids = concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
count = 0
for i_d in ids:
    for i_v in i_d:
        count += i_v.size
count

In [None]:
len(ids)

## Experiment with tokens

In [None]:
prompt = tr_analyzer.tokens[1197]
text = tr_analyzer.to_clean(prompt)
text

In [None]:
concept_an = ConceptAnalysis(text, tr_analyzer)

In [None]:
concept_an.analyze_concepts()

In [None]:
t_idcs = [33, 34]

In [None]:
layer = 0

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v0_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
v0_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print([8118, 10840], [23, 13], layer, with_text=True, limit=400)

In [None]:
layer = 8

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print([2393, 1950], [15, 4], layer, with_text=True, limit=400)

In [None]:
layer = 11

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print([22746, 3493], [15, 7], layer, with_text=True, limit=400)

## One Token Features

In [None]:
text = ' catwoman ' 
text

In [None]:
concept_an = ConceptAnalysis(text, tr_analyzer)

In [None]:
concept_an.analyze_concepts()

In [None]:
t_idcs = [1, 2]

In [None]:
layer = 0

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v0_GF = concept_an.v_FG[layer][1]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
layer = 8

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v8_GF = concept_an.v_FG[layer][1]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals[:10], idcs[:10]

In [None]:
v8_GF = concept_an.v_FG[layer][2]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs[1], vals[1], layer, with_text=True, limit=400)

In [None]:
layer = 11

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v11_GF = concept_an.v_FG[layer][1]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs[1:3], vals[1:3], layer, with_text=True, limit=400)

## Other Text

In [None]:
text = ' A Golden Gate Bridge'
text

In [None]:
text = ' New York City'
text

In [None]:
concept_an = ConceptAnalysis(text, tr_analyzer)

In [None]:
concept_an.analyze_concepts()

In [None]:
t_idcs = [2, 3]
t_idcs = [1]
# t_idcs = [2]
t_idcs = [1, 2, 3]

In [None]:
layer = 0

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v0_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v0_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v0_GF = concept_an.v_FG[layer][t_idcs[2]]
vals, idcs = topK(v0_GF, v0_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
v_meet = meet(v_1, v_2)
v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
layer = 8

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals[:10], idcs[:10]

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs[1:], vals[1:], layer, with_text=True, limit=100)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v8_GF = concept_an.v_FG[layer][t_idcs[2]]
vals, idcs = topK(v8_GF, v8_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
v_meet = meet(v_1, v_2)
v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print([16679, 10130, 20994], [12, 9, 9], layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

In [None]:
layer = 11

In [None]:
concept_an.gen_text(t_idcs, layer)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[0]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[1]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
v11_GF = concept_an.v_FG[layer][t_idcs[2]]
vals, idcs = topK(v11_GF, v11_GF.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print([2094, 10988, 10533], [20, 14, 12], layer, with_text=True, limit=400)

In [None]:
concept_an.gen_and_print(idcs[1:2], vals[1:2], layer, with_text=True, limit=400)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
v_meet = meet(v_1, v_2)
# v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=400)

In [None]:
v_1 = concept_an.v_FG[layer][t_idcs[0]]
v_2 = concept_an.v_FG[layer][t_idcs[1]]
v_3 = concept_an.v_FG[layer][t_idcs[2]]
# v_meet = meet(v_1, v_2)
v_meet = meet(v_1, v_3)
# v_meet = meet(v_2, v_3)
# v_meet = meet(v_meet, v_3)
vals, idcs = topK(v_meet, v_meet.shape[0])
vals, idcs

In [None]:
concept_an.gen_and_print(idcs, vals, layer, with_text=True, limit=100)

## Corss Features

In [None]:
v_is = concept_an.v_is[11]
v_is

In [None]:
concept_an.detected_vs[11][24][7]

In [None]:
v_the = concept_an.detected_vs[11][24][7][92]
v_extremely = concept_an.detected_vs[11][24][7][123]

In [None]:
v_meet = meet(v_is[3], v_is[11])

In [None]:
v_meet = meet(v_the, v_extremely)

In [None]:
topK(v_is[3], 10)

In [None]:
topK(v_is[11], 10)

In [None]:
topK(v_meet, 10)

In [None]:
topK(v_the, 10)

In [None]:
topK(v_extremely, 10)

In [None]:
topK(v_meet, 10)

In [None]:
v_dets = concept_an.detected_vs[11][0]
v_dets

In [None]:
v1 = v_dets[3][9]
v2 = v_dets[11][6]
v12 = meet(v1, v2)

In [None]:
topK(v12, 10)

In [None]:
c12 = concept_an.G_FG(v12, 11)
c12

In [None]:
for st in concept_an.corpus[c12.A]:
    print(concept_an.to_string(st))

In [None]:
c_the_extr = concept_an.G_FG(v_meet, 11)
c_the_extr

In [None]:
for st in concept_an.corpus[c_the_extr.A]:
    print(concept_an.to_string(st))

## Experiments Pos and Negs Black

In [None]:
tok_indx = 143

In [None]:
text_detoken = tr_analyzer.to_clean(tr_analyzer.tokens[tok_indx])
text_detoken

In [None]:
concept_an = ConceptAnalysis(text_detoken, tr_analyzer)

In [None]:
concept_an.analyze_concepts()

In [None]:
i, j = 27, 28
t_idcs = [28, 127]

In [None]:
layer_0 = 0

In [None]:
concept_an.tr_utils.transcoder.background_dets = None

In [None]:
concept_an.gen_text(t_idcs, layer_0)

In [None]:
det_22 = concept_an.detected_vs[layer_0][22]
det_22

In [None]:
v_b1 = det_22[28][32]
v_b2 = det_22[127][32]

In [None]:
v_b3 = det_22[127][96]
v_b4 = det_22[127][119]

In [None]:
v_b = join(v_b1, v_b2)

In [None]:
topK(v_b1, 20)

In [None]:
topK(v_b3, 20)

In [None]:
topK(v_b4, 20)

In [None]:
np.all(v_b1 == v_b2)

In [None]:
layer_8 = 8

In [None]:
concept_an.gen_text(t_idcs, layer_8)

In [None]:
concept_an.detected_vs[layer_8]

In [None]:
layer_11 = 11

In [None]:
concept_an.gen_text(t_idcs, layer_11)

In [None]:
concept_an.detected_vs[layer_11][3]

In [None]:
concept_an.detected_vs[layer_11][8]

In [None]:
v_white = concept_an.detected_vs[layer_11][8][28][62]
v_black = concept_an.detected_vs[layer_11][8][127][34]

In [None]:
topK(v_white, 10)

In [None]:
topK(v_black, 10)

In [None]:
v_meet = meet(v_white, v_black)

In [None]:
vals, indcs = topK(v_meet, 10)
vals, indcs

In [None]:
c_meet = concept_an.gen_concept(indcs[1], vals[1], layer_11)
c_meet

In [None]:
concept_an.gen_and_print(indcs[1:4], vals[1:4], layer_11, with_text=True, limit=20)

In [None]:
concept_an.gen_and_print([21836], [17], layer_11, with_text=True, limit=20)