In [59]:
import csv
import os
interface_dir = os.environ["DATA"] + "webinterfaces/int03_prototype/"
tasks_dir = os.path.join(interface_dir, "res", "tasks")
AI_J1_tasks_indices = [33, 35, 40, 42, 48, 50, 55, 57]
H_J1_tasks_indices = [32, 34, 38, 40, 45, 47, 51, 53]

EASY_TASKS = ["easy_3_7_purple", "easy_4_row_triangle", "easy_5_5_yellow", "easy_6_row_square"]
DIFFICULT_TASKS = ["hard_3_yellow_circle_plus_triangle_9", "hard_4_purple_squares_times2_circles", "hard_5_purple_triangle_plus_square_7", "hard_6_blue_circles_times2_triangles"]


In [19]:
import json

def load_json_file(path):
    with open(path) as json_data:
        d = json.load(json_data)
        return d


In [20]:
def load_task_csv_file(path):
    y_true, y_pred = [], []
    with open(path) as csv_data:
        reader = csv.DictReader(csv_data)
        for row in reader:
            y_true.append(int(row["target"]))
            y_pred.append(int(row["pred"]))

    return np.array(y_true), np.array(y_pred)

In [44]:
import numpy as np

def extract_p_expe_results(d_p_expe):

    # Counting number of questions
    nb_quest = len(d_p_expe.keys())-1

    answers_vect = np.zeros((nb_quest,), dtype=float)
    time_vect = np.zeros((nb_quest,), dtype=float)
    time_exceeded_vect = np.zeros((nb_quest,), dtype=bool)
    for k, v in d_p_expe.items():
        if k == "time_on_page":
            continue
        curr_idx = v["order_index"]
        answers_vect[curr_idx] = v["answers"][0][0] if v["answers"][0] else None
        time_vect[curr_idx] = v["time"]
        time_exceeded_vect[curr_idx] = v["is_time_exceeded"]

    return answers_vect, time_vect, time_exceeded_vect

In [56]:
from numpy import logical_not


def compute_score(exp_output_file, tasks_dir, expe_file_indices, tasks_names):
    exp_d = load_json_file(exp_output_file)
    nb_correct = 0
    nb_total = 0

    for i in range(len(expe_file_indices)):
        y_true, _ = load_task_csv_file(os.path.join(tasks_dir, tasks_names[i]+"_content.csv"))
        answers_vect, _, _ = extract_p_expe_results(exp_d["data"][expe_file_indices[i]])
        answers_vect_binary = logical_not(answers_vect)

        print(answers_vect_binary == y_true)
        nb_correct += np.sum(answers_vect_binary == y_true)
        nb_total += len(answers_vect_binary)


    return nb_correct/nb_total




CF1

In [63]:
path = os.path.join(interface_dir, "res", "results", "TESTCF1.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True False  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True False False False  True  True False]
[ True False False  True  True  True  True False False False]


0.875

H1

In [64]:
path = os.path.join(interface_dir, "res", "results", "TESTH1.json")
compute_score(path, tasks_dir, H_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True False  True  True]
[ True  True  True  True  True  True  True  True False  True]
[ True  True  True  True  True  True  True  True  True  True]
[False  True False  True False  True  True  True False False]
[ True False False  True  True  True  True  True False False]


0.8625

AI1

In [65]:
path = os.path.join(interface_dir, "res", "results", "TESTA1.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[False  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True False  True]
[ True  True  True  True  True  True  True  True  True  True]
[False  True False  True False False  True  True  True False]
[False False False  True  True  True  True False False False]


0.8375

SHAP

In [66]:
Lpath = os.path.join(interface_dir, "res", "results", "TESTSH1.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True False  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True False  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True  True  True False  True]
[ True False  True  True  True  True  True  True False  True]
[ True  True  True  True  True  True False  True  True  True]
[ True  True  True  True  True  True  True False  True False]


0.8875

LLM1

In [67]:
path = os.path.join(interface_dir, "res", "results", "TESTT1.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[False  True  True  True False  True  True  True False  True]
[ True False  True  True  True  True  True  True False  True]
[ True False  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True  True False  True False]


0.8875

CF2

In [68]:
path = os.path.join(interface_dir, "res", "results", "TESTCF2.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True False]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True  True  True  True  True]
[ True  True  True  True False  True  True  True False  True]
[ True  True  True  True  True  True  True  True False  True]
[False  True  True  True  True False  True False  True False]
[False  True  True  True  True  True  True False  True  True]


0.8625

H2

In [69]:
path = os.path.join(interface_dir, "res", "results", "TESTH2.json")
compute_score(path, tasks_dir, H_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True False  True]
[False  True  True  True  True  True  True  True False False]
[ True  True  True False  True  True  True False  True  True]


0.925

AI2

In [70]:
path = os.path.join(interface_dir, "res", "results", "TESTA2.json")
compute_score(path, tasks_dir, AI_J1_tasks_indices, EASY_TASKS+DIFFICULT_TASKS)

[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True  True  True  True  True]
[False  True  True  True  True  True  True  True False  True]
[ True False  True  True  True  True  True  True  True  True]
[ True False  True  True  True  True False  True  True  True]
[ True  True  True  True  True  True  True False  True False]


0.9