In [1]:
import pandas as pd

from master_thesis.core.utils.reproducibility import seed_everything
from master_thesis.core.models.llama import load_model_and_tokenizer
from master_thesis.core.utils.prompts import load_prompt
from master_thesis.core.methods.probing_with_interventions import (
    ProbingWithInterventions,
)
from master_thesis.core.probes import load_cav
from master_thesis.core.utils.reproducibility import save_results


DEVICE = "cuda"
PROMPT_TYPE = "few_shot"
POSITIVE_TOKEN = "true"
NEGATIVE_TOKEN = "false"
MODEL = "LLAMA_2_7B_CHAT"

LABEL_ASPECT_MODEL = "MMProbe"
LABEL_ASPECT_LAYER = 12

CONFOUNDING_ASPECT_MODEL = "MMProbe"
CONFOUNDING_ASPECT_LAYER = 12

DATA_DIR = "../../../../data"
DATASETS_DIR = f"{DATA_DIR}/datasets/base_experiments/europe_vs_usa/test"
CACHE_DIR = f".cache/results"
CLASS_NAMES = ["usa", "europe"]
LABEL_ASCPECT_NAMES = ["usa", "europe"]
CONFOUNDING_ASPECT_NAMES = ["nighttime", "daytime"]

In [2]:
seed_everything()

In [3]:
model, tokenizer = load_model_and_tokenizer(MODEL)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
TOKENS = [tokenizer.encode(NEGATIVE_TOKEN)[-1], tokenizer.encode(POSITIVE_TOKEN)[-1]]

### Prepare datasets, prompts and cavs

In [5]:
test_50_50 = pd.read_csv(f"{DATASETS_DIR}/test_50_50.csv")
test_95_5 = pd.read_csv(f"{DATASETS_DIR}/test_95_5.csv")

In [6]:
label_prompt = load_prompt(
    DATA_DIR,
    dataset_path="base_experiments/europe_vs_usa",
    prompt_type=PROMPT_TYPE,
    prompt_aspect="label_aspect",
)

confounding_prompt = load_prompt(
    DATA_DIR,
    dataset_path="base_experiments/europe_vs_usa",
    prompt_type=PROMPT_TYPE,
    prompt_aspect="confounding_aspect",
)

In [7]:
label_cav = load_cav(f".cache/cavs/{LABEL_ASPECT_MODEL}", f"label_{LABEL_ASPECT_LAYER}")
confounding_cav = load_cav(
    f".cache/cavs/{CONFOUNDING_ASPECT_MODEL}", f"confounding_{CONFOUNDING_ASPECT_LAYER}"
)

### Estimate Probing With Interventions

### Label

#### Test 50 50

In [8]:
seed_everything()

probing_with_interventions = ProbingWithInterventions(
    cav=label_cav,
    model=model,
    tokenizer=tokenizer,
    tokens=TOKENS,
    aspect_prompt=label_prompt,
    prompt=label_prompt,
    layer=12,
    device=DEVICE,
    verbose=True,
)

In [None]:
test_50_50_results_label = probing_with_interventions.evaluate(
    test_50_50, aspect_label_name="label", original_label_name="label"
)

In [3]:
save_results(
    test_50_50_results_label,
    class_names=CLASS_NAMES,
    aspect_names=LABEL_ASCPECT_NAMES,
    save_path=f"{CACHE_DIR}/probing_with_interventions/test_50_50/label_aspect_{LABEL_ASPECT_MODEL}_{LABEL_ASPECT_LAYER}.json",
)

In [4]:
test_50_50_results_label

{0: {0: 0.94, 1: -0.94}, 1: {0: -0.95, 1: 0.95}}

#### Test 95 5

In [11]:
seed_everything()

probing_with_interventions = ProbingWithInterventions(
    cav=label_cav,
    model=model,
    tokenizer=tokenizer,
    tokens=TOKENS,
    aspect_prompt=label_prompt,
    prompt=label_prompt,
    layer=12,
    device=DEVICE,
    verbose=True,
)

In [None]:
test_95_5_results_label = probing_with_interventions.evaluate(
    test_95_5, aspect_label_name="label", original_label_name="label"
)

In [6]:
save_results(
    test_95_5_results_label,
    class_names=CLASS_NAMES,
    aspect_names=LABEL_ASCPECT_NAMES,
    save_path=f"{CACHE_DIR}/probing_with_interventions/test_95_5/label_aspect_{LABEL_ASPECT_MODEL}_{LABEL_ASPECT_LAYER}.json",
)

In [7]:
test_95_5_results_label

{0: {0: 0.92, 1: -0.92}, 1: {0: -0.94, 1: 0.94}}

### Confounding

#### Test 50 50

In [14]:
seed_everything()

probing_with_interventions = ProbingWithInterventions(
    cav=confounding_cav,
    model=model,
    tokenizer=tokenizer,
    tokens=TOKENS,
    aspect_prompt=confounding_prompt,
    prompt=label_prompt,
    layer=12,
    device=DEVICE,
    verbose=True,
)

In [None]:
test_50_50_results_confounding = probing_with_interventions.evaluate(
    test_50_50, aspect_label_name="confounding", original_label_name="label"
)

In [9]:
save_results(
    test_50_50_results_confounding,
    class_names=CLASS_NAMES,
    aspect_names=CONFOUNDING_ASPECT_NAMES,
    save_path=f"{CACHE_DIR}/probing_with_interventions/test_50_50/confounding_aspect_{CONFOUNDING_ASPECT_MODEL}_{CONFOUNDING_ASPECT_LAYER}.json",
)

In [11]:
test_50_50_results_confounding

{0: {0: 0.39, 1: -0.39}, 1: {0: -0.05, 1: 0.05}}

#### Test 95 5

In [17]:
seed_everything()

probing_with_interventions = ProbingWithInterventions(
    cav=confounding_cav,
    model=model,
    tokenizer=tokenizer,
    tokens=TOKENS,
    aspect_prompt=confounding_prompt,
    prompt=label_prompt,
    layer=12,
    device=DEVICE,
    verbose=True,
)

In [None]:
test_95_5_results_confounding = probing_with_interventions.evaluate(
    test_95_5, aspect_label_name="confounding", original_label_name="label"
)

In [13]:
save_results(
    test_95_5_results_confounding,
    class_names=CLASS_NAMES,
    aspect_names=CONFOUNDING_ASPECT_NAMES,
    save_path=f"{CACHE_DIR}/probing_with_interventions/test_95_5/confounding_aspect_{CONFOUNDING_ASPECT_MODEL}_{CONFOUNDING_ASPECT_LAYER}.json",
)

In [14]:
test_95_5_results_confounding

{0: {0: 0.71, 1: -0.71}, 1: {0: -0.17, 1: 0.17}}