In [1]:
import pandas as pd

from master_thesis.core.utils.reproducibility import seed_everything
from master_thesis.core.models.llama import load_model_and_tokenizer
from master_thesis.core.utils.prompts import load_prompt
from master_thesis.core.models.evaluate import evaluate_model_accuracy


DEVICE = "cuda"
PROMPT_TYPE = "few_shot"
POSITIVE_TOKEN = "true"
NEGATIVE_TOKEN = "false"
MODEL = "LLAMA_2_7B_CHAT"

DATA_DIR = "../../../data"
DATASETS_DIR = f"{DATA_DIR}/datasets/base_experiments/car_vs_bike"

In [2]:
seed_everything()

In [3]:
model, tokenizer = load_model_and_tokenizer(MODEL)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
TOKENS = [tokenizer.encode(NEGATIVE_TOKEN)[-1], tokenizer.encode(POSITIVE_TOKEN)[-1]]

In [5]:
label_prompt = load_prompt(
    DATA_DIR,
    dataset_path="base_experiments/car_vs_bike",
    prompt_type=PROMPT_TYPE,
    prompt_aspect="label_aspect",
)

confounding_prompt = load_prompt(
    DATA_DIR,
    dataset_path="base_experiments/car_vs_bike",
    prompt_type=PROMPT_TYPE,
    prompt_aspect="confounding_aspect",
)

### Test

In [6]:
test = pd.read_csv(f"{DATASETS_DIR}/test/test.csv")

In [7]:
evaluate_model_accuracy(model, tokenizer, test, label_prompt, "label", TOKENS, DEVICE)

100%|██████████| 400/400 [02:45<00:00,  2.42it/s]


0.9625

In [8]:
evaluate_model_accuracy(
    model, tokenizer, test, confounding_prompt, "confounding", TOKENS, DEVICE
)

100%|██████████| 400/400 [02:43<00:00,  2.45it/s]


0.995

### Train - label

In [9]:
label_aspect = pd.read_csv(f"{DATASETS_DIR}/train/label_aspect.csv")

In [10]:
evaluate_model_accuracy(
    model, tokenizer, label_aspect, label_prompt, "label", TOKENS, DEVICE
)

100%|██████████| 200/200 [01:22<00:00,  2.42it/s]


0.875

### Train - confounding

In [11]:
confounding_aspect = pd.read_csv(f"{DATASETS_DIR}/train/confounding_aspect.csv")

In [12]:
evaluate_model_accuracy(
    model,
    tokenizer,
    confounding_aspect,
    confounding_prompt,
    "confounding",
    TOKENS,
    DEVICE,
)

100%|██████████| 200/200 [01:21<00:00,  2.45it/s]


1.0