In [1]:
import pandas as pd
from pathlib import Path

from master_thesis.core.activations.store import collect_acts
from master_thesis.core.visualization.visualizer import Visualizer


DEVICE = "cuda"
PROMPT_TYPE = "few_shot"
POSITIVE_TOKEN = "true"
NEGATIVE_TOKEN = "false"
MODEL = "LLAMA_2_7B_CHAT"
LAYERS_NUMBER = 31

DATA_DIR = "../../../../data"
DATASETS_DIR = f"{DATA_DIR}/datasets/base_experiments/car_vs_bike"
ACTIVATIONS_DIR = f"{DATA_DIR}/activations/{MODEL}/base_experiments/car_vs_bike"
SAVE_DIR = ".cache/visualizations/"

ACTS_BATCH_SIZE = 25

### Train dataset - label

In [2]:
label_df = pd.read_csv(f"{DATASETS_DIR}/train/label_aspect.csv")


for layer in range(1, LAYERS_NUMBER + 1):
    save_dir = f"{SAVE_DIR}/train/label_aspect"
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    activations = collect_acts(
        f"{ACTIVATIONS_DIR}/train/few_shot_label_aspect",
        layer=layer,
        center=True,
        scale=True,
    )

    label_df["activation"] = list(activations)

    visualizer = Visualizer(label_df)

    fig = visualizer.plot(
        dimensions=2,
        color_label="label",
        layer=layer,
    )

    fig.write_image(f"{save_dir}/layer_{layer}.png")

    # fig.show()

Collecting activations from layer 1: 100%|██████████| 8/8 [00:03<00:00,  2.66it/s]
Collecting activations from layer 2: 100%|██████████| 8/8 [00:00<00:00, 975.56it/s]
Collecting activations from layer 3: 100%|██████████| 8/8 [00:00<00:00, 1062.86it/s]
Collecting activations from layer 4: 100%|██████████| 8/8 [00:00<00:00, 1118.56it/s]
Collecting activations from layer 5: 100%|██████████| 8/8 [00:00<00:00, 1115.21it/s]
Collecting activations from layer 6: 100%|██████████| 8/8 [00:00<00:00, 1109.64it/s]
Collecting activations from layer 7: 100%|██████████| 8/8 [00:00<00:00, 1095.69it/s]
Collecting activations from layer 8: 100%|██████████| 8/8 [00:00<00:00, 1134.98it/s]
Collecting activations from layer 9: 100%|██████████| 8/8 [00:00<00:00, 1105.62it/s]
Collecting activations from layer 10: 100%|██████████| 8/8 [00:00<00:00, 1110.08it/s]
Collecting activations from layer 11: 100%|██████████| 8/8 [00:00<00:00, 1129.89it/s]
Collecting activations from layer 12: 100%|██████████| 8/8 [00:00<

### Train dataset - confounding

In [3]:
confounding_df = pd.read_csv(f"{DATASETS_DIR}/train/confounding_aspect.csv")


for layer in range(1, LAYERS_NUMBER + 1):
    save_dir = f"{SAVE_DIR}/train/confounding_aspect"
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    activations = collect_acts(
        f"{ACTIVATIONS_DIR}/train/few_shot_confounding_aspect",
        layer=layer,
        center=True,
        scale=True,
    )

    confounding_df["activation"] = list(activations)

    visualizer = Visualizer(confounding_df)

    fig = visualizer.plot(
        dimensions=2,
        color_label="confounding",
        layer=layer,
    )

    fig.write_image(f"{save_dir}/layer_{layer}.png")

    # fig.show()

Collecting activations from layer 1: 100%|██████████| 8/8 [00:00<00:00, 1073.81it/s]
Collecting activations from layer 2: 100%|██████████| 8/8 [00:00<00:00, 1123.09it/s]
Collecting activations from layer 3: 100%|██████████| 8/8 [00:00<00:00, 1107.37it/s]
Collecting activations from layer 4: 100%|██████████| 8/8 [00:00<00:00, 1129.70it/s]
Collecting activations from layer 5: 100%|██████████| 8/8 [00:00<00:00, 933.70it/s]
Collecting activations from layer 6: 100%|██████████| 8/8 [00:00<00:00, 884.76it/s]
Collecting activations from layer 7: 100%|██████████| 8/8 [00:00<00:00, 1121.02it/s]
Collecting activations from layer 8: 100%|██████████| 8/8 [00:00<00:00, 1052.32it/s]
Collecting activations from layer 9: 100%|██████████| 8/8 [00:00<00:00, 1096.55it/s]
Collecting activations from layer 10: 100%|██████████| 8/8 [00:00<00:00, 1127.31it/s]
Collecting activations from layer 11: 100%|██████████| 8/8 [00:00<00:00, 920.94it/s]
Collecting activations from layer 12: 100%|██████████| 8/8 [00:00<

### Test dataset - label

In [5]:
test_df = pd.read_csv(f"{DATASETS_DIR}/test/test.csv")


for layer in range(1, LAYERS_NUMBER + 1):
    save_dir = f"{SAVE_DIR}/test/label_aspect"
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    activations = collect_acts(
        f"{ACTIVATIONS_DIR}/test/few_shot_label_aspect",
        layer=layer,
        center=True,
        scale=True,
    )

    test_df["activation"] = list(activations)

    visualizer = Visualizer(test_df)

    fig = visualizer.plot(
        dimensions=2,
        color_label="label",
        layer=layer,
    )

    fig.write_image(f"{save_dir}/layer_{layer}.png")

    # fig.show()

Collecting activations from layer 1: 100%|██████████| 16/16 [00:00<00:00, 1064.26it/s]
Collecting activations from layer 2: 100%|██████████| 16/16 [00:00<00:00, 1188.76it/s]
Collecting activations from layer 3: 100%|██████████| 16/16 [00:00<00:00, 1143.13it/s]
Collecting activations from layer 4: 100%|██████████| 16/16 [00:00<00:00, 1203.96it/s]
Collecting activations from layer 5: 100%|██████████| 16/16 [00:00<00:00, 1156.07it/s]
Collecting activations from layer 6: 100%|██████████| 16/16 [00:00<00:00, 1091.49it/s]
Collecting activations from layer 7: 100%|██████████| 16/16 [00:00<00:00, 1220.61it/s]
Collecting activations from layer 8: 100%|██████████| 16/16 [00:00<00:00, 1174.88it/s]
Collecting activations from layer 9: 100%|██████████| 16/16 [00:00<00:00, 830.50it/s]
Collecting activations from layer 10: 100%|██████████| 16/16 [00:00<00:00, 893.14it/s]
Collecting activations from layer 11: 100%|██████████| 16/16 [00:00<00:00, 715.01it/s]
Collecting activations from layer 12: 100%|█

### Test dataset - confounding

In [6]:
test_df = pd.read_csv(f"{DATASETS_DIR}/test/test.csv")


for layer in range(1, LAYERS_NUMBER + 1):
    save_dir = f"{SAVE_DIR}/test/confounding_aspect"
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    activations = collect_acts(
        f"{ACTIVATIONS_DIR}/test/few_shot_confounding_aspect",
        layer=layer,
        center=True,
        scale=True,
    )

    test_df["activation"] = list(activations)

    visualizer = Visualizer(test_df)

    fig = visualizer.plot(
        dimensions=2,
        color_label="confounding",
        layer=layer,
    )

    fig.write_image(f"{save_dir}/layer_{layer}.png")

    # fig.show()

Collecting activations from layer 1: 100%|██████████| 16/16 [00:00<00:00, 1142.82it/s]
Collecting activations from layer 2: 100%|██████████| 16/16 [00:00<00:00, 920.51it/s]
Collecting activations from layer 3: 100%|██████████| 16/16 [00:00<00:00, 1003.95it/s]
Collecting activations from layer 4: 100%|██████████| 16/16 [00:00<00:00, 1181.74it/s]
Collecting activations from layer 5: 100%|██████████| 16/16 [00:00<00:00, 893.82it/s]
Collecting activations from layer 6: 100%|██████████| 16/16 [00:00<00:00, 1069.48it/s]
Collecting activations from layer 7: 100%|██████████| 16/16 [00:00<00:00, 1128.96it/s]
Collecting activations from layer 8: 100%|██████████| 16/16 [00:00<00:00, 1160.73it/s]
Collecting activations from layer 9: 100%|██████████| 16/16 [00:00<00:00, 1105.33it/s]
Collecting activations from layer 10: 100%|██████████| 16/16 [00:00<00:00, 1132.51it/s]
Collecting activations from layer 11: 100%|██████████| 16/16 [00:00<00:00, 1001.97it/s]
Collecting activations from layer 12: 100%|