In [None]:
import pandas as pd
import numpy as np
from methods import data_handler
from tqdm import tqdm
import matplotlib.pyplot as plt

from torchvision import datasets
import os
from itertools import chain, product

## Read questionnaire information from picking procedure

In [None]:
questionnaires = data_handler.get_questionaires("data2/questionaires_shuffled.pickle")
labels = data_handler.get_labels()

In [None]:
def get_label_from_img_idx(img_idx, testset_path, labels):    
    img_folder = datasets.ImageFolder(root=testset_path)
    img_path = img_folder.imgs[img_idx][0]
    img_name = img_path.split(os.sep)[-1]
    # extract correct class
    class_idx_true_str = img_path.split(os.sep)[-2]
    img_label_true = labels[class_idx_true_str][1]
    return img_label_true

In [None]:
# enrich questionnaire data with image label names
questionnaires_2 = []
for questionnaire in tqdm(questionnaires):
    questionnaire_2 = []
    for question in questionnaire:
        label = get_label_from_img_idx(question[0], "data2/imagenetv2-matched-frequency-format-val", labels)
        question_labled = (label, ) + question        
        questionnaire_2.append(question_labled)
    questionnaires_2.append(questionnaire_2) 

## Load question codes used in SoSci

In [None]:
codes_list = []
for i in range(1, 12+1):
    codes = pd.read_csv(f"questionaires_shuffle_order/questionaire_{i}.txt", sep=";", names=[0, 1])[1]
    codes = codes.str.extract("(\w\d{3})")
    codes = list(codes[0])
    codes_list.append(codes)

## Create questions meta data df

In [None]:
df_quest_meta = pd.DataFrame(list(chain(*questionnaires_2)))
df_quest_meta[5] = list(chain(*codes_list))
df_quest_meta.columns = ["label", "img_idx", "model", "method", "is_pred_correct", "question_code"]

In [None]:
df_quest_meta

## Load and transform questionnaire data

In [None]:
# data_tu-helpfulness-of-xai_2022-06-23_20-20.xlsx
df = pd.read_excel("data2/data_tu-helpfulness-of-xai_2022-06-23_20-20.xlsx")
df_answer_codes = pd.read_csv("data2/values_tu-helpfulness-of-xai_2022-06-28_11-48.csv", sep='\t', encoding='utf-16').set_index(["VAR", "RESPONSE"])

In [None]:
def map_answer_codes_to_textual(s, df_answer_codes):
    def map_(s_name, e, df_answer_codes):
        try:
            return df_answer_codes.loc[s_name, e]["MEANING"]
        except:
            
            return e
    try:
        return s.apply(lambda e: map_(s.name, e, df_answer_codes))
    except:
        print("Error in mapping column", s.name)
        return s

In [None]:
def convert_q_data_from_wide_to_long(df):
    column_names_demographic = list(df[df.columns[df.columns.get_loc("DE02"):df.columns.get_loc("FB01_01")+1]].loc[0])
    #delete column descriptions
    df = df.drop(0)    
    df_long = pd.melt(df, id_vars="CASE", value_vars=df.columns.values[6:294]).dropna()
    df_long.columns = ["case", "question_code", "response"]
    df_long = df_long.sort_values("case", )
    # map response 1(Yes)/2(No) values to True/False
    df_long["response"] = df_long["response"].apply(lambda x: True if x==1 else False)
    df_long = df_long.reset_index(drop=True)
    
    df_demo = df[df.columns[df.columns.get_loc("DE02"):df.columns.get_loc("FB01_01")+1]]
    df_demo = df_demo.apply(lambda s: map_answer_codes_to_textual(s, df_answer_codes))
    df_demo.columns = column_names_demographic
    num_questions = 24
    df_demo = df_demo.apply(lambda s: s.repeat(num_questions)).reset_index(drop=True) 
    df_long = pd.concat([df_long["case"], df_demo, df_long[df_long.columns[1:]]], axis=1)
    
    return df_long

In [None]:
df_long = convert_q_data_from_wide_to_long(df)

# Analysis

In [None]:
df_merged = df_long.merge(right=df_quest_meta)
df_merged = df_merged.sort_values("case")

In [None]:
df_merged

## Metrics on all images (fixed + random)

### Overall ability to guess model is correct if actually correct

In [None]:
num_guessed_correct_if_correct = df_merged[(df_merged["is_pred_correct"] == True) & (df_merged["response"] == True)].shape[0]
num_correct_overall = df_merged[df_merged["is_pred_correct"] == True].shape[0]

In [None]:
num_guessed_correct_if_correct / num_correct_overall

### Overall ability to guess model is wrong if actually wrong

In [None]:
num_guessed_wrong_if_wrong = df_merged[(df_merged["is_pred_correct"] == False) & (df_merged["response"] == False)].shape[0]
num_wrong_overall = df_merged[df_merged["is_pred_correct"] == False].shape[0]

In [None]:
num_guessed_wrong_if_wrong / num_wrong_overall

### Ability for each XAI-method to guess model is correct/wrong if actually correct/wrong

In [None]:
def create_individual_ratios(df_merged):
    df_ratios_per_method = df_merged.groupby(by=["method", "is_pred_correct"])["response"].value_counts(normalize=True).to_frame("ratio").reset_index()
    df_ratios_per_method = df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == df_ratios_per_method["response"]].reset_index(drop=True)
    # append overall averages
    df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", False, False, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == False]["ratio"].mean()]
    df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", True, True, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == True]["ratio"].mean()]
    return df_ratios_per_method

In [None]:
def create_individual_ratios_all(df_merged):
    test = pd.DataFrame(list(product(df_ratios_per_method_case["case"].drop_duplicates(), df_ratios_per_method_case["method"].drop_duplicates(), df_ratios_per_method_case["is_pred_correct"].drop_duplicates(), df_ratios_per_method_case["response"].drop_duplicates()))).groupby([0, 1, 2, 3]).count()
    test = test.reset_index()
    test.columns=['case', 'method', 'is_pred_correct', 'response']
    
    df_ratios_per_method = df_merged.groupby(by=["method", "is_pred_correct"])["response"].value_counts(normalize=True).to_frame("ratio").reset_index()
    df_ratios_per_method = df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == df_ratios_per_method["response"]].reset_index(drop=True)
    # append overall averages
    df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", False, False, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == False]["ratio"].mean()]
    df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", True, True, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == True]["ratio"].mean()]
    return df_ratios_per_method

In [None]:
df_ratios_per_method = create_individual_ratios(df_merged)

In [None]:
df_ratios_per_method

In [None]:
def plot_ratios(df_ratios_per_method, title_addition=""):        
    fig, ax = plt.subplots(figsize=(14, 5))
    ax.scatter(x=df_ratios_per_method["method"].drop_duplicates(), y=df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == True]["ratio"], c="orange", label="Guessed Correct when Model Correct")
    ax.scatter(x=df_ratios_per_method["method"].drop_duplicates(), y=df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == False]["ratio"], c="purple", label="Guessed Wrong when Model Wrong")
    ax.plot(df_ratios_per_method["method"], [0.5] * len(df_ratios_per_method), color="red", linestyle='dashed', label="baseline")
    ax.set_title(f"{title_addition} Performance Ratios for chosen XAI-Methods ({len(df)} participants considered)", size=15)
    ax.set_xlabel("XAI-Method", size=13)
    ax.set_ylabel("Ratio", size=13)
    for i, txt in enumerate(df_ratios_per_method["ratio"].round(2)):
        ax.annotate(txt, (list(df_ratios_per_method["method"])[i], list(df_ratios_per_method["ratio"])[i]))
    ax.legend(loc="best", edgecolor="black")

In [None]:
plot_ratios(df_ratios_per_method)

## Metrics only on fixed images

In [None]:
fixed_img_idxs = df_quest_meta["img_idx"].value_counts()[df_quest_meta["img_idx"].value_counts() == 12].index
df_quest_meta_fixed = df_quest_meta[df_quest_meta["img_idx"].isin(fixed_img_idxs)]

In [None]:
df_merged_fixed = df_long.merge(right=df_quest_meta_fixed)
df_merged_fixed = df_merged_fixed.sort_values("case")

In [None]:
df_ratios_per_method_fixed = create_individual_ratios(df_merged_fixed)

In [None]:
plot_ratios(df_ratios_per_method_fixed, "Fixed Imgs:")

## Convergence of ratio values as more participant results are taken into account

In [None]:
def create_individual_ratios_per_participant(df_merged):
    # df_schema needed to get all TP,TN,FP,FN i.e. where value_counts() would evaluate nothing because not existent (0)
    df_schema = pd.DataFrame(list(product(df_merged["case"].drop_duplicates(), df_merged["method"].drop_duplicates(), df_merged["is_pred_correct"].drop_duplicates(), df_merged["response"].drop_duplicates()))).groupby([0, 1, 2, 3]).count()
    df_schema = df_schema.reset_index()
    df_schema.columns=['case', 'method', 'is_pred_correct', 'response']
    df_ratios_per_method_and_part = df_merged.groupby(by=["case", "method", "is_pred_correct"])["response"].value_counts(normalize=True).to_frame("ratio").reset_index()
    df_ratios_per_method_and_part = df_schema.merge(right=df_ratios_per_method_and_part, on=["case", "method", "is_pred_correct", "response"], how="left")
    df_ratios_per_method_and_part = df_ratios_per_method_and_part.fillna(0)
    # only filter for TP and TP (just for now, might get modified)
    df_ratios_per_method_and_part = df_ratios_per_method_and_part[df_ratios_per_method_and_part["is_pred_correct"] == df_ratios_per_method_and_part["response"]].reset_index(drop=True)
    return df_ratios_per_method_and_part.groupby(by=["method", "is_pred_correct"]).expanding().mean()#.reset_index(level=2, drop=True)
    #return df_ratios_per_method_and_part
    
    # df_ratios_per_method = df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == df_ratios_per_method["response"]].reset_index(drop=True)
    # append overall averages
    # df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", False, False, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == False]["ratio"].mean()]
    # df_ratios_per_method.loc[len(df_ratios_per_method)] = ["Overall", True, True, df_ratios_per_method[df_ratios_per_method["is_pred_correct"] == True]["ratio"].mean()]

In [None]:
df_ratios_convergence = create_individual_ratios_per_participant(df_merged)
df_ratios_convergence

In [None]:
def plot_ratio_convergence(df_ratios_convergence):
    fig, ax = plt.subplots(figsize=(14, 5))
    ax.set_xlabel("Number Participants", size=13)
    ax.set_ylabel("Ratio", size=13)
    ax.set_title("Convergence of Ratios as Ratios are calculated over increasing Numbers of Participants", size=15)
    for method in df_ratios_convergence.index.get_level_values(0).drop_duplicates():
        for outcome in df_ratios_convergence.index.get_level_values(1).drop_duplicates():
            ax.plot(list(range(len(df_ratios_convergence.loc[method, outcome]))), df_ratios_convergence.loc[method, outcome]["ratio"], label=f"{method}, {outcome}")
            # print(df_ratios_convergence.loc[method, outcome]["ratio"].iloc[-1])
    ax.legend(loc="lower center", edgecolor="black")

In [None]:
plot_ratio_convergence(df_ratios_convergence, )