In [75]:
import os

interface_dir = os.environ["DATA"] + "webinterfaces/exp02/"

tasks_dir = os.path.join(interface_dir, "res", "tasks")
results_dir = os.path.join(interface_dir, "results")
protocols_dir = os.path.join(interface_dir, "protocols")
prolific_matching_dir = os.path.join(interface_dir, "prolific")

protocol_paths_d = {
    "H": os.path.join(protocols_dir, "H_0.json"),
    "H+AI": os.path.join(protocols_dir, "AI_0.json"),
    "H+AI+CF": os.path.join(protocols_dir, "XAI_CF_0.json"),
    "H+AI+SHAP": os.path.join(protocols_dir, "XAI_SHAP_0.json"),
    "H+AI+LLM": os.path.join(protocols_dir, "XAI_LLM_0.json"),
    "H+AI+GRADCAM": os.path.join(protocols_dir, "XAI_GRADCAM_0.json"),
}

COMPREHENSION_THRESHOLD = 0.8

COMPREHENSION_TASKS = ["xeasy1_find_pattern_rot"]
TRAINING_TASKS = ["med3_find_pattern_rot"]
EASY_TASKS = ["easy1_find_pattern_rot", "easy3_find_pattern_rot"]
DIFFICULT_TASKS = ["hard1_find_pattern_rot", "hard3_find_pattern_rot"]

MILD_PRESSURE_TASKS = ["easy1_find_pattern_rot", "hard1_find_pattern_rot"]
STRONG_PRESSURE_TASKS = ["easy3_find_pattern_rot", "hard3_find_pattern_rot"]

TASK_PROTOCOL_KEYS = {
    "easy1_find_pattern_rot": "mainexp_easy_mild_patrot_task",
    "easy3_find_pattern_rot": "mainexp_easy_strong_patrot_task",

    "hard1_find_pattern_rot": "mainexp_hard_mild_patrot_task",
    "hard3_find_pattern_rot": "mainexp_hard_strong_patrot_task",

    "xeasy1_find_pattern_rot": "intro_comprehension_task",
    "med3_find_pattern_rot": "intro_training_1_task"
}

full_bonus_pay = 53*0.04


In [76]:
import json

def data_matching(protocols_paths_d, prolific_matching_files):

    results_filenames_d = {k: [] for k in protocol_paths_d.keys()}

    for prolific_matching_file in prolific_matching_files:

        with open(prolific_matching_file) as json_data:
            d = json.load(json_data)

            for prolific_id, prot_dict in d.items():
                condition_split = prot_dict["protocol"].split("_")
                filename = prolific_id + ".json"

                if condition_split[0] == "H":
                    results_filenames_d["H"].append(filename)
                elif condition_split[0] == "AI":
                    results_filenames_d["H+AI"].append(filename)
                elif condition_split[0] == "XAI" and condition_split[1] == "SHAP":
                    results_filenames_d["H+AI+SHAP"].append(filename)
                elif condition_split[0] == "XAI" and condition_split[1] == "CF":
                    results_filenames_d["H+AI+CF"].append(filename)
                elif condition_split[0] == "XAI" and condition_split[1] == "LLM":
                    results_filenames_d["H+AI+LLM"].append(filename)
                elif condition_split[0] == "XAI" and condition_split[1] == "GRADCAM":
                    results_filenames_d["H+AI+GRADCAM"].append(filename)

    return results_filenames_d

In [78]:
# results_filenames_d = data_matching(protocol_paths_d, [os.path.join(prolific_matching_dir, "prolific.json")])

In [104]:
results_filenames_d = data_matching(protocol_paths_d, [os.path.join(prolific_matching_dir, "prolific_21-1.json")])


In [105]:
results_filenames_d

{'H': ['66c9f7305f8a64d04cb5344c.json',
  '6105ba07b4fa0867eaed2f5b.json',
  '6658b23d488052210af6eb17.json',
  '5fe0b183d8918b2030aa99a0.json',
  '6777c88074566bdef374f9e5.json',
  '66bdde69417a86ba178f7871.json',
  '5cfbf333ac28dd00196e807b.json',
  '66dd8fd2905d609d29ccf008.json',
  '60a7af637db81b5c05648d92.json',
  '613dd5fb512aabe8d5d32393 .json',
  '5b2e41eb1d2f710001fb32db.json',
  '5b2a2d855b5c0900018b0bf5.json',
  '671c1c699ca32031a83014e8.json',
  '665dc46f6552a7a3a0039fe6.json',
  '5fb281433a31c686f0ec3ecb.json',
  '60e845f301f5a5901200baf4.json',
  '6033aebb7228c70967a3ab43.json',
  '63f77a5dde1ea03305e119e0.json',
  '631b41500bed6fddcc87c089.json'],
 'H+AI': ['5b68c9eb87af310001584803.json',
  '5c1575e62a407b0001ff2a4c.json',
  '5ea00d4b1286ee0008405450.json',
  '65e4a6bdde84e615dcac2651.json',
  '65a3ec0d50e146d8bd324e1f.json',
  '6458fe267afe71c7aa4de260.json',
  '5b017ef1293d310001023bd8.json',
  '5d7fea714477a000012cc297.json',
  '67606663e5ad15d60ffeb222.json',
  '59

In [106]:
for k, v in results_filenames_d.items():
    print(f"{k}: {len(v)}")

H: 19
H+AI: 19
H+AI+CF: 20
H+AI+SHAP: 19
H+AI+LLM: 20
H+AI+GRADCAM: 20


In [107]:
import sys

sys.path.append("/home/jleguy/Documents/postdoc/git_repos/WebXAII/")

In [108]:
import json
import csv
import numpy as np


def load_json(path):
    with open(path) as json_file:
        return json.load(json_file)


def load_task_csv_file(path):
    y_true, y_pred = [], []
    with open(path) as csv_data:
        reader = csv.DictReader(csv_data)
        for row in reader:
            y_true.append(int(row["target"]))
            y_pred.append(int(row["pred"]))

    return np.array(y_true), np.array(y_pred)


In [109]:
from pywebxaii.resretrieval import extract_p_task_results


def compute_scores_by_key(results_dir, results_filenames_d, protocol_paths_d, tasks_dir, tasks_names, task_protocol_keys):
    output_res_scores_d = {}

    # Iterating over all groups
    for group_key, filenames_list in results_filenames_d.items():

        output_res_scores_d[group_key] = {}

        # Iterating on all results files for the current group
        for filename in filenames_list:
            prolific_id = filename.split(".")[0]
            curr_res_path = os.path.join(results_dir, filename)
            curr_res_d = load_json(curr_res_path)
            if not curr_res_d["is_completed"]:
                continue

            nb_questions = 0
            nb_quest_wrong_predictions = 0
            nb_quest_right_predictions = 0
            nb_correct = 0
            early_break = False

            # Iterating over all tasks
            for task_idx, task_name in enumerate(tasks_names):

                task_true, ai_pred = load_task_csv_file(os.path.join(tasks_dir, task_name + "_content.csv"))

                answers_idx_vect, answers_text_vect, _, _, _ = \
                    extract_p_task_results(curr_res_d,
                                           task_protocol_keys[tasks_names[task_idx]],
                                           protocol_d=load_json(protocol_paths_d[group_key]))

                nb_questions += len(answers_idx_vect)
                nb_quest_wrong_predictions += np.sum(task_true != ai_pred)
                nb_quest_right_predictions += np.sum(task_true == ai_pred)
                try:
                    nb_correct += np.sum(answers_idx_vect == np.logical_not(task_true))
                    #
                    # if np.isnan(np.sum(answers_idx_vect)):
                    #     print(f"answers {answers_idx_vect}")
                    #     print(f"true {np.logical_not(task_true)}")
                    #     print(f"ai pred {np.logical_not(ai_pred)}")
                    #     print(f"correct extracted {np.sum(answers_idx_vect == np.logical_not(task_true))}")
                    #     print(f"reliance extracted {np.sum(answers_idx_vect == np.logical_not(ai_pred))}")

                # Happens if the results file is not complete
                except ValueError:
                    print("ValueError exception")
                    output_res_scores_d[group_key][prolific_id] = None

                    early_break = True
                    break

            if not early_break:
                output_res_scores_d[group_key][prolific_id] = nb_correct / nb_questions

    return output_res_scores_d


In [110]:
all_scores = compute_scores_by_key(results_dir, results_filenames_d, protocol_paths_d,
                                                                               tasks_dir,
                                                                               EASY_TASKS + DIFFICULT_TASKS, TASK_PROTOCOL_KEYS)

In [111]:
all_scores

{'H': {'66c9f7305f8a64d04cb5344c': 0.625,
  '6658b23d488052210af6eb17': 0.6875,
  '5fe0b183d8918b2030aa99a0': 0.8125,
  '6777c88074566bdef374f9e5': 0.7916666666666666,
  '66bdde69417a86ba178f7871': 0.7916666666666666,
  '66dd8fd2905d609d29ccf008': 0.6666666666666666,
  '60a7af637db81b5c05648d92': 0.7291666666666666,
  '613dd5fb512aabe8d5d32393 ': 0.5625,
  '5b2e41eb1d2f710001fb32db': 0.7916666666666666,
  '5b2a2d855b5c0900018b0bf5': 0.7083333333333334,
  '671c1c699ca32031a83014e8': 0.4791666666666667,
  '665dc46f6552a7a3a0039fe6': 0.6875,
  '5fb281433a31c686f0ec3ecb': 0.7083333333333334,
  '6033aebb7228c70967a3ab43': 0.75,
  '63f77a5dde1ea03305e119e0': 0.8333333333333334,
  '631b41500bed6fddcc87c089': 0.7083333333333334},
 'H+AI': {'5b68c9eb87af310001584803': 0.8125,
  '5c1575e62a407b0001ff2a4c': 0.6875,
  '5ea00d4b1286ee0008405450': 0.8958333333333334,
  '65e4a6bdde84e615dcac2651': 0.8125,
  '65a3ec0d50e146d8bd324e1f': 0.8958333333333334,
  '6458fe267afe71c7aa4de260': 0.9375,
  '5b017

In [112]:
def gen_bonus_files(scores, bonus_reward_location, accepted_location, full_bonus_pay):
    csv_data_reward = []
    csv_data_accepted = []
    payment_sum = 0
    participants = 0

    for group, d in scores.items():
        for prolific_id, score in d.items():
            payment = score*full_bonus_pay
            csv_data_reward.append([prolific_id, f"{payment:.2f}"])
            csv_data_accepted.append([prolific_id])
            payment_sum += payment
            participants += 1

    with open(bonus_reward_location, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(csv_data_reward)

    with open(accepted_location, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(csv_data_accepted)

    print(f"Total payment for {participants} participants is {payment_sum}, corresponding to a mean value of {payment_sum/participants}")


In [114]:
gen_bonus_files(all_scores, os.path.join(results_dir, "rewards_21-1.csv"), os.path.join(results_dir, "accepted_21-1.csv"),
                full_bonus_pay)

Total payment for 98 participants is 164.78583333333341, corresponding to a mean value of 1.6814880952380962
