# Baselines

In [None]:
import pandas as pd
import os
import wandb
from VLM_base_classes import DeepSeekVLBaseClass, LlavaBaseClass
from models import load_model
from config import PATH_TO_DATASETS

In [None]:
model_name = "DeepSeek_VL"
model, processor = load_model(model_name)

In [None]:
if model_name == "DeepSeek_VL":
    baseclass = DeepSeekVLBaseClass
elif model_name == "LlaVa":
    baseclass = LlavaBaseClass

baseline = baseclass(model=model, processor=processor)

In [None]:
df_train = pd.read_csv(
    os.path.join(PATH_TO_DATASETS, "advbench_mini_train.csv"), index_col=0
)
df_test = pd.read_csv(
    os.path.join(PATH_TO_DATASETS, "advbench_mini_test.csv"), index_col=0
)
df_test.head()

In [None]:
df_train_copy = df_train.copy()
df_test_copy = df_test.copy()

df_train_copy["baseline_automatic"] = ""
df_test_copy["baseline_automatic"] = ""
df_train_copy["baseline_manual"] = ""
df_test_copy["baseline_manual"] = ""

In [None]:
max_new_tokens = 500

run = wandb.init(project="datasets", job_type="upload_dataset")
baselines_artifact = wandb.Artifact("baselines_advbench_mini", type="baselines")

for ind in df_test.index:
    prompt = str(df_test.loc[ind, "goal"])
    _, answer = baseline.generate_autoregressive(
        prompt=prompt, image=None, max_new_tokens=max_new_tokens, no_eos_token=False
    )
    _, answer_manual = baseline.generate_autoregressive_manual(
        prompt=prompt,
        image=None,
        use_cache=False,
        max_new_tokens=max_new_tokens,
        no_eos_token=False,
    )

    df_test_copy.loc[ind, "baseline_automatic"] = answer
    df_test_copy.loc[ind, "baseline_manual"] = "".join(answer_manual)

table_scores_test = wandb.Table(dataframe=df_test_copy)
baselines_artifact.add(table_scores_test, name="baselines_test")

for ind in df_train.index:
    prompt = str(df_train.loc[ind, "goal"])
    _, answer = baseline.generate_autoregressive(
        prompt=prompt, image=None, max_new_tokens=max_new_tokens, no_eos_token=False
    )
    _, answer_manual = baseline.generate_autoregressive_manual(
        prompt=prompt,
        image=None,
        use_cache=False,
        max_new_tokens=max_new_tokens,
        no_eos_token=False,
    )

    df_train_copy.loc[ind, "baseline_automatic"] = answer
    df_train_copy.loc[ind, "baseline_manual"] = "".join(answer_manual)

table_scores_train = wandb.Table(dataframe=df_train_copy)
baselines_artifact.add(table_scores_train, name="baselines_train")

run.log_artifact(baselines_artifact)
run.finish()

path_to_results = os.path.join(PATH_TO_DATASETS, "results")
df_train_copy.to_csv(os.path.join(path_to_results, "advbench_mini_train_baseline.csv"))
df_test_copy.to_csv(os.path.join(path_to_results, "advbench_mini_test_baseline.csv"))