In [8]:
import pandas as pd
import torch as t

from master_thesis.core.utils.reproducibility import seed_everything
from master_thesis.core.probes import (
    CAV,
    MMProbe,
    LRProbe,
    LDAProbe,
    SVMProbe,
    save_cav,
    load_cav,
)
from master_thesis.core.activations.store import collect_acts


DEVICE = "cuda"
PROMPT_TYPE = "few_shot"
POSITIVE_TOKEN = "true"
NEGATIVE_TOKEN = "false"
MODEL = "LLAMA_2_7B_CHAT"

DATA_DIR = "../../../../data"
DATASETS_DIR = f"{DATA_DIR}/datasets/base_experiments/cebab/train"

In [9]:
seed_everything()

### Aspects

In [10]:
CONFIGS = {
    LRProbe: {
        "food": 5,
        "ambiance": 11,
        "service": 7,
        "noise": 7,
    },
    LDAProbe: {
        "food": 14,
        "ambiance": 17,
        "service": 14,
        "noise": 19,
    },
    MMProbe: {
        "food": 14,
        "ambiance": 26,
        "service": 14,
        "noise": 14,
    },
    SVMProbe: {
        "food": 14,
        "ambiance": 29,
        "service": 16,
        "noise": 18,
    },
}

In [6]:
CONFIGS = {
    LRProbe: {
        "food": 22,
        "ambiance": 22,
        "service": 22,
        "noise": 16,
    },
    LDAProbe: {
        "food": 22,
        "ambiance": 22,
        "service": 22,
        "noise": 16,
    },
    MMProbe: {
        "food": 22,
        "ambiance": 22,
        "service": 22,
        "noise": 16,
    },
    SVMProbe: {
        "food": 22,
        "ambiance": 22,
        "service": 22,
        "noise": 16,
    },
}

In [11]:
for probe, aspects in CONFIGS.items():
    for aspect, layer in aspects.items():
        aspect_df = pd.read_csv(f"{DATASETS_DIR}/{aspect}_aspect.csv")
        acts = collect_acts(
            f"{DATA_DIR}/activations/{MODEL}/base_experiments/cebab/train/few_shot_{aspect}_aspect",
            layer=layer,
            center=False,
            scale=False,
        )
        cav = CAV(probe, DEVICE)
        cav.fit(aspect_df, acts, aspect)

        save_cav(cav.cav, f".cache/cavs/{probe.__name__}", f"{aspect}_{layer}")

Collecting activations from layer 5: 100%|██████████| 8/8 [00:00<00:00, 1048.84it/s]


Learned CAV for concept: food
	tensor([-0.4416, -0.1286], device='cuda:0')...tensor([ 0.6451, -0.2157], device='cuda:0')
	Accuracy: 99.5%



Collecting activations from layer 11: 100%|██████████| 8/8 [00:00<00:00, 1077.92it/s]


Learned CAV for concept: ambiance
	tensor([0.2089, 0.4595], device='cuda:0')...tensor([0.1462, 0.2073], device='cuda:0')
	Accuracy: 100.0%



Collecting activations from layer 7: 100%|██████████| 8/8 [00:00<00:00, 1083.73it/s]


Learned CAV for concept: service
	tensor([ 0.7790, -0.4575], device='cuda:0')...tensor([-0.1603,  0.1431], device='cuda:0')
	Accuracy: 99.0%



Collecting activations from layer 7: 100%|██████████| 8/8 [00:00<00:00, 1102.57it/s]


Learned CAV for concept: noise
	tensor([-0.6635,  0.6634], device='cuda:0')...tensor([-0.7068, -0.4436], device='cuda:0')
	Accuracy: 100.0%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 1143.76it/s]


Learned CAV for concept: food
	tensor([0.6138, 0.2730])...tensor([0.0174, 0.4396])
	Accuracy: 98.5%



Collecting activations from layer 17: 100%|██████████| 8/8 [00:00<00:00, 348.70it/s]


Learned CAV for concept: ambiance
	tensor([-0.2012, -0.1309])...tensor([ 0.3326, -0.0201])
	Accuracy: 98.5%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 80.96it/s]


Learned CAV for concept: service
	tensor([-0.0022,  0.2476])...tensor([ 0.0464, -0.1082])
	Accuracy: 98.0%



Collecting activations from layer 19: 100%|██████████| 8/8 [00:00<00:00, 339.02it/s]


Learned CAV for concept: noise
	tensor([0.0078, 0.0723])...tensor([0.1587, 0.6234])
	Accuracy: 98.5%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 77.51it/s]


Learned CAV for concept: food
	tensor([0.0901, 0.2081], device='cuda:0')...tensor([-0.1184,  0.2527], device='cuda:0')
	Accuracy: 94.0%



Collecting activations from layer 26: 100%|██████████| 8/8 [00:00<00:00, 752.58it/s]


Learned CAV for concept: ambiance
	tensor([ 0.7879, -0.2162], device='cuda:0')...tensor([0.3817, 0.2965], device='cuda:0')
	Accuracy: 85.5%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 1122.94it/s]


Learned CAV for concept: service
	tensor([0.0839, 0.1667], device='cuda:0')...tensor([-0.1263,  0.1593], device='cuda:0')
	Accuracy: 90.0%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 1057.07it/s]


Learned CAV for concept: noise
	tensor([ 0.0021, -0.0094], device='cuda:0')...tensor([ 0.0500, -0.0036], device='cuda:0')
	Accuracy: 93.0%



Collecting activations from layer 14: 100%|██████████| 8/8 [00:00<00:00, 1112.51it/s]


Learned CAV for concept: food
	tensor([0.0229, 0.0280])...tensor([-0.0248,  0.0008])
	Accuracy: 100.0%



Collecting activations from layer 29: 100%|██████████| 8/8 [00:00<00:00, 307.04it/s]


Learned CAV for concept: ambiance
	tensor([ 0.0079, -0.0192])...tensor([-0.0208, -0.0095])
	Accuracy: 98.0%



Collecting activations from layer 16: 100%|██████████| 8/8 [00:00<00:00, 384.34it/s]


Learned CAV for concept: service
	tensor([ 0.0026, -0.0125])...tensor([-0.0070, -0.0481])
	Accuracy: 100.0%



Collecting activations from layer 18: 100%|██████████| 8/8 [00:00<00:00, 448.80it/s]


Learned CAV for concept: noise
	tensor([0.0102, 0.0217])...tensor([-0.0082,  0.0080])
	Accuracy: 99.5%

