In [17]:
import os
import pandas as pd
import pickle
import numpy as np

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

plt.rcParams['figure.dpi'] = 600
plt.rcParams['savefig.dpi'] = 600

In [18]:
explainer_map = {'conceptx': 'ConceptX', 'aconceptx': 'AntonymConceptX', 'conceptx_r': 'ConceptX-R', 'conceptx_a': 'ConceptX-A', 'conceptshap': 'ConceptSHAP', 'tokenshap': 'TokenSHAP', 'random': 'Random'}
explainer_order = ["Random", "TokenSHAP", "ConceptSHAP", "ConceptX", 'AntonymConceptX', "ConceptX-R", "ConceptX-A"]
MODEL_NAMES = {"gpt2": "GPT-2", "gemma-2-2b":"Gemma-2-2B", "gemma-3-4b":"Gemma-3-4B", "gpt4o-mini": "GPT-4o-mini", "llama-3-3b": "Llama-3.2-3B"}

In [3]:
save_dir = "/cluster/home/kamara/conceptx"
seed_ = 0

In [19]:
folder_path = os.path.join(save_dir, f"results/classification-antonym")  # Replace with your folder path

# Initialize a list to store the dataframes
df_list = []

# Loop through each file in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        full_path = os.path.join(root, file)
        parts = file.split('_')
        print("parts: ", parts)
        
        df = pd.read_csv(full_path)

        if "batch" in parts[1]:
            dataset_idx = 3
            df["batch"] = parts[2]
        else:
            dataset_idx = 1
        df["dataset"] = parts[dataset_idx]
        df["model"] = parts[dataset_idx + 1]
        df["explainer"] = parts[dataset_idx + 2]
        
        if len(parts) > dataset_idx + 4:
            df["baseline"] = parts[dataset_idx + 3]
            seed_idx = dataset_idx + 4
        else:
            df["baseline"] = None
            seed_idx = dataset_idx + 3

        df["seed"] = parts[seed_idx].split(".")[0]
        df_list.append(df)

# Concatenate all dataframes in the list
df_all = pd.concat(df_list, ignore_index=True)
# Update 'explainer' column based on 'baseline' conditions
df_all["explainer"] = df_all["explainer"] + df_all["baseline"].map({"reference": "_r", "aspect": "_a"}).fillna("")
df_all

parts:  ['classification-antonym', 'batch', '0', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '2', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '3', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '1', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '6', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '7', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '8', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '5', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '4', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '9', 'sst2', 'llama-3-3b', 'conceptshap', '0.csv']
parts:  ['classification-antonym', 'batch', '0', '

Unnamed: 0,id,input,p0,p_highest,p_label,aspect,highest_token,label,batch,dataset,model,explainer,baseline,seed
0,0,hide new secretions from the parental units,0.147157,0.104042,0.137585,negative,secretions,hide,0,sst2,llama-3-3b,conceptshap,,0
1,1,"contains no wit , only labored gags",0.704776,0.099989,0.336867,negative,gags,labored,0,sst2,llama-3-3b,conceptshap,,0
2,3,remains utterly satisfied to remain the same t...,0.203234,0.031730,0.047706,negative,satisfied,utterly,0,sst2,llama-3-3b,conceptshap,,0
3,8,a depressed fifteen-year-old 's suicidal poetry,0.861898,0.818883,0.812673,negative,year,suicidal,0,sst2,llama-3-3b,conceptshap,,0
4,14,lend some dignity to a dumb story,0.840426,0.837251,0.079819,negative,lend,dumb,0,sst2,llama-3-3b,conceptshap,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34249,6031,i dont understand the whole follow friday thing,0.590553,0.620394,0.109361,negative,follow,understand,9,sentiment,gpt4o-mini,conceptx_a,aspect,0
34250,6032,richards sorry to be the bearer of bad news,0.896045,0.200717,0.200717,negative,bad,bad,9,sentiment,gpt4o-mini,conceptx_a,aspect,0
34251,6037,my girlfriend still likes sesame st and so do I,0.685415,0.458998,0.217103,positive,sesame,likes,9,sentiment,gpt4o-mini,conceptx_a,aspect,0
34252,6041,KillBoy I know the feeling,0.593670,0.157630,0.593670,negative,KillBoy,Kill,9,sentiment,gpt4o-mini,conceptx_a,aspect,0


In [20]:
### Size of dataset for model
model_list = ["llama-3-3b", "gemma-3-4b", "gpt4o-mini"]
df = df_all[df_all["model"].isin(model_list)]
df_stats = df.groupby(["dataset", "model", "explainer"])["id"].nunique().reset_index()

In [21]:
dataset = "sentiment"
df_stats[df_stats['dataset']==dataset]

Unnamed: 0,dataset,model,explainer,id
0,sentiment,gemma-3-4b,aconceptx,927
1,sentiment,gemma-3-4b,conceptshap,948
2,sentiment,gemma-3-4b,conceptx,938
3,sentiment,gemma-3-4b,conceptx_a,938
4,sentiment,gemma-3-4b,random,996
5,sentiment,gemma-3-4b,tokenshap,996
6,sentiment,gpt4o-mini,aconceptx,919
7,sentiment,gpt4o-mini,conceptshap,937
8,sentiment,gpt4o-mini,conceptx,929
9,sentiment,gpt4o-mini,conceptx_a,928


In [22]:
# Convert relevant columns to numeric
df_all["p0"] = pd.to_numeric(df_all["p0"], errors="coerce")
df_all["p_highest"] = pd.to_numeric(df_all["p_highest"], errors="coerce")
df_all["p_label"] = pd.to_numeric(df_all["p_label"], errors="coerce")


In [23]:
# Filter only by dataset and p0 threshold (no longer filtering by model)
df = df_all[(df_all["dataset"] == dataset)]
df = df[df["p0"] > 0.5]

# Compute diffs
df["p0_p_highest_diff"] = df["p0"] - df["p_highest"]
df["p0_p_label_diff"] = df["p0"] - df["p_label"]

# Compute stats: group by explainer and model
stats = df.groupby(["explainer", "model"])[["p0_p_highest_diff"]].agg(["mean", "var"])

# Compute stats: group by explainer and model
df_label = df[['model', 'explainer', "p0_p_label_diff"]]
df_label = df_label.rename(columns={'p0_p_label_diff': 'p0_p_highest_diff'})

stats_label = df_label.groupby(["model"])[["p0_p_highest_diff"]].agg(["mean", "var"])
# Step 2: Add new level to index to create the new explainer row
stats_label["explainer"] = "gpt4o-mini_explainer"
stats_label = stats_label.set_index("explainer", append=True)
stats_label.index = stats_label.index.reorder_levels(["explainer", "model"])

# Step 3: Concatenate with df1
df_combined = pd.concat([stats, stats_label])

# Step 4: Optional: sort for clarity
df_combined = df_combined.sort_index()


# Flatten multi-index columns
df_combined.columns = [f"{model}_{stat}" for metric, stat in df_combined.columns for model in [metric.split("_diff")[0]]]

# Pivot so each model becomes a column group
df_combined = df_combined.unstack("model")

# Reorder and rename explainers
desired_order = ["random", "tokenshap", "conceptshap", "conceptx", "aconceptx", "conceptx_a", "gpt4o-mini_explainer"]
explainer_map = {
    'conceptx': 'ConceptX',
    'aconceptx': 'AntonymConceptX',
    'conceptx_r': 'ConceptX-R',
    'conceptx_a': 'ConceptX-A',
    'conceptshap': 'ConceptSHAP',
    'tokenshap': 'TokenSHAP',
    'random': 'Random',
    'gpt4o-mini_explainer': 'GPT-4o mini'
}
df_combined = df_combined.rename(index=explainer_map)
df_combined = df_combined.reindex([explainer_map.get(e, e) for e in desired_order])

df_combined.columns = df_combined.columns.swaplevel()
df_combined = df_combined.sort_index(axis=1)
df_combined

model,gemma-3-4b,gemma-3-4b,gpt4o-mini,gpt4o-mini,llama-3-3b,llama-3-3b
Unnamed: 0_level_1,p0_p_highest_mean,p0_p_highest_var,p0_p_highest_mean,p0_p_highest_var,p0_p_highest_mean,p0_p_highest_var
explainer,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Random,0.136672,0.068833,0.137902,0.070212,0.13565,0.066046
TokenSHAP,0.384658,0.139008,0.428577,0.135817,0.155412,0.07119
ConceptSHAP,0.321727,0.136756,0.367423,0.139335,0.175997,0.091633
ConceptX,0.294545,0.13021,0.308379,0.13438,0.203254,0.098645
AntonymConceptX,0.386466,0.14504,0.346844,0.135727,0.221914,0.105111
ConceptX-A,0.23669,0.112373,0.300422,0.129533,0.193819,0.096515
GPT-4o mini,0.502124,0.127368,0.500553,0.127427,0.500327,0.129132


In [24]:

MODEL_NAMES = {"gpt2": "GPT-2", "gemma-2-2b":"Gemma-2-2B", "gemma-3-4b":"Gemma-3-4B", "gpt4o-mini": "GPT-4o mini", "llama-3-3b": "Llama-3.2-3B"}
model_map = MODEL_NAMES
model_order = ["Llama-3.2-3B", "Gemma-3-4B", "GPT-4o mini"]


# Rename models in column MultiIndex
df_combined.columns = pd.MultiIndex.from_tuples([
    (model_map.get(model, model), metric) for model, metric in df_combined.columns
])

# Reorder columns according to model_order
df_combined = df_combined[model_order]
df_combined

Unnamed: 0_level_0,Llama-3.2-3B,Llama-3.2-3B,Gemma-3-4B,Gemma-3-4B,GPT-4o mini,GPT-4o mini
Unnamed: 0_level_1,p0_p_highest_mean,p0_p_highest_var,p0_p_highest_mean,p0_p_highest_var,p0_p_highest_mean,p0_p_highest_var
explainer,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Random,0.13565,0.066046,0.136672,0.068833,0.137902,0.070212
TokenSHAP,0.155412,0.07119,0.384658,0.139008,0.428577,0.135817
ConceptSHAP,0.175997,0.091633,0.321727,0.136756,0.367423,0.139335
ConceptX,0.203254,0.098645,0.294545,0.13021,0.308379,0.13438
AntonymConceptX,0.221914,0.105111,0.386466,0.14504,0.346844,0.135727
ConceptX-A,0.193819,0.096515,0.23669,0.112373,0.300422,0.129533
GPT-4o mini,0.500327,0.129132,0.502124,0.127368,0.500553,0.127427
