# Dysfunctional ball task

## Imports

In [None]:
import pandas as pd
import os
from os.path import isfile, join
import numpy as np
import ast
import matplotlib.pyplot as plt
import warnings
import csv
import datetime as dt

### Constants

In [None]:
social = False

In [None]:
post_trial_q = ["AttribQ", "ControlQ", "StressQ", "MotivationQ"]

In [None]:
scales = {
    "ius": [
        "Not at all characteristic of me",
        "A little characteristic of me",
        "Somewhat characteristic of me",
        "Very characteristic of me",
        "Entirely characteristic of me"
    ],
    "shaps": [
        "strongly disagree",
        "disagree",
        "agree",
        "strongly agree"
    ],
    "pss": [
        "Never",
        "Almost Never",
        "Sometimes",
        "Fairly Often",
        "Very Often"
    ],
    "phq": [
        "Not at all",
        "Several days",
        "More than half the days",
        "Nearly every day"
    ],
    "gad": [
        "Not at all",
        "Several days",
        "More than half the days",
        "Nearly every day"
    ],
    "drsp": [
        "Not at all",
        "Minimal",
        "Mild",
        "Moderate",
        "Severe",
        "Extreme"
    ],
    "stress": [
        "Not at all",
        "Minimal",
        "Mild",
        "Moderate",
        "Severe",
        "Extreme"
    ],
    "psst_symptoms": [
        "Not at all",
        "Mild",
        "Moderate",
        "Severe"
    ],
    "psst_interference": [
        "Not at all",
        "Mild",
        "Moderate",
        "Severe"
    ],
    "usability": [
        "strongly disagree",
        "disagree",
        "slightly disagree",
        "neutral",
        "slightly agree",
        "agree",
        "strongly agree",
    ],
}

In [None]:
reverse = {
    "ius": False,
    "shaps": [
        False,
        True,
        False,
        True,
        True,
        False,
        True,
        False,
        True,
        False,
        False,
        False,
        False,
        False
    ],
    "pss": [
        False,
        False,
        False,
        True,
        True,
        False,
        True,
        True,
        False,
        False
    ],
    "phq": False,
    "gad": False,
    "drsp": False,
    "stress": [
        False,
        True,
        False,
    ],
    "psst_symptoms": False,
    "psst_interference": False,
    "usability": False,
}

### Auxiliary functions

In [None]:
def custom_formatwarning(message, category, filename, lineno, line=None):
    msg = warnings.WarningMessage(message, category, filename, lineno, None, line)
    category = msg.category.__name__
    return f"{msg.filename}:{msg.lineno}: {category}: {msg.message}\n"

warnings.formatwarning = custom_formatwarning

#### Completion codes

In [None]:
def check_cycle_lengths(date_data):
    date1_str = date_data["date1"]
    date2_str = date_data["date2"]
    date3_str = date_data["date3"]
    date1 = dt.date(int(date1_str[:4]), int(date1_str[5:7]), int(date1_str[8:]))
    date2 = dt.date(int(date2_str[:4]), int(date2_str[5:7]), int(date2_str[8:]))
    date3 = dt.date(int(date3_str[:4]), int(date3_str[5:7]), int(date3_str[8:]))
    durations = [
        np.abs((date1 - date2).days),
        np.abs((date2 - date3).days)
    ]
    return np.mean(durations)
        


In [None]:
def check_cycling(question_id, trial_data, name, expe_data, verbose=False):
    match question_id:
        case 4:
            menstrual_cycle = ast.literal_eval(trial_data["response"])["menstrual_cycle"]
            if verbose:
                print("menstrual cycle: ", menstrual_cycle)
            return menstrual_cycle == "Yes"
        case 7:
            try:
                cycle_length = ast.literal_eval(trial_data["response"])["cycle_length"]
            except:
                cycle_length = trial_data["response"]
            if verbose:
                print("cycle length: ", cycle_length)
            try:
                cycle_length = int(cycle_length)
                if cycle_length < 10:
                    new_cycle_length = check_cycle_lengths(expe_data["dates"])
                    warnings.warn_explicit("Very small cycle length: " + str(cycle_length) + ". Calculated cycle length: " + str(new_cycle_length), Warning, name, trial_data["trial_index"])
                    cycle_length = new_cycle_length
                return cycle_length >= 21 and cycle_length <= 35
            except:
                new_cycle_length = check_cycle_lengths(expe_data["dates"])
                warnings.warn_explicit("Non int cycle length: " + cycle_length + ". Calculated cycle length: " + str(new_cycle_length), Warning, name, trial_data["trial_index"])
                cycle_length = new_cycle_length
                return cycle_length >= 21 and cycle_length <= 35
        case 8:
            cycle_fluctuation = ast.literal_eval(trial_data["response"])["cycle_fluctuation"]
            if verbose:
                print("cycle fluctuation: ", cycle_fluctuation)
            return "Yes, regular cycle lengths" in cycle_fluctuation
        case 9:
            menopause_transition = ast.literal_eval(trial_data["response"])["menopause_transition"]
            if verbose:
                print("menopause transition: ", menopause_transition)
            return "No" in menopause_transition
        case 10:
            pregnant_breastfeeding = ast.literal_eval(trial_data["response"])["pregnant_breastfeeding"]
            if verbose:
                print("pregnant breastfeeding: ", pregnant_breastfeeding)
            return "No" in pregnant_breastfeeding
        case 11:
            contraceptive_use = ast.literal_eval(trial_data["response"])["contraceptive_use"]
            if verbose:
                print("contraceptive use: ", contraceptive_use)
            return "No" in contraceptive_use
        case 13:
            iud_use = ast.literal_eval(trial_data["response"])["iud_use"]
            if verbose:
                print("iud use: ", iud_use)
            return "NO" in iud_use or "YES, COPPER IUD" in iud_use
        case 14:
            hormone_use = ast.literal_eval(trial_data["response"])["hormone_use"]
            if verbose:
                print("hormone use: ", hormone_use)
            return "NO"in hormone_use 
        case 16:
            gynecologic_conditions = ast.literal_eval(trial_data["response"])["gynecologic_conditions"]
            if verbose:
                print("gynecologic conditions: ", gynecologic_conditions)
            return "NO" in gynecologic_conditions
    return True

In [None]:
def check_attention(event_type, trial_data):
    if event_type == "phq":
        if ast.literal_eval(trial_data["responses"])["Q07"] != '2':
            return 1
    if event_type == "ius":
        if ast.literal_eval(trial_data["responses"])["Q08"] != '0':
            return 1
    if event_type == "psst_symptoms":
        response = ast.literal_eval(trial_data["responses"])["Q09"]
        if response != '1':
            return 1
    return 0

#### Plotting

In [None]:
def plot_quartiles(x, data, label=None, color=0):
    median = [np.median(elt) for elt in data]
    Q1 = [np.quantile(elt, 0.25) for elt in data]
    Q3 = [np.quantile(elt, 0.75) for elt in data]
    IQR = [(Q3[i] - Q1[i]) * 2 for i in range(len(Q1))]
    Q0 = [max(median[i] - IQR[i], np.min(data[i])) for i in range(len(data))]
    Q4 = [min(median[i] + IQR[i], np.max(data[i])) for i in range(len(data))]
    plt.fill_between(x, Q0, Q4, alpha=0.2, color="C{}".format(color))
    plt.fill_between(x, Q1, Q3, alpha=0.3, color="C{}".format(color))
    plt.plot(x, median, label=label, color="C{}".format(color))
    

##### Social

In [None]:
def compare_features_social(feature1, features2, data, operation, showfliers=True):
    res_social = dict()
    res_non_social = dict()
    for i in range(len(data)):
        for j in range(len(data[i][features2[0]])):
            if data[i][feature1][j] not in res_social.keys():
                res_social[data[i][feature1][j]] = []
                res_non_social[data[i][feature1][j]] = []
            if not np.isnan(data[i][features2[0]][j]):
                if (data[i]["social"][j] == "social"):
                    res_social[data[i][feature1][j]].append(data[i][features2[0]][j])
                    for k in range(1, len(features2)):
                        match operation:
                            case "+":
                                res_social[data[i][feature1][j]][-1] += (data[i][features2[k]][j])
                            case "/":
                                res_social[data[i][feature1][j]][-1] /= (data[i][features2[k]][j])
                else:
                    res_non_social[data[i][feature1][j]].append(data[i][features2[0]][j])
                    for k in range(1, len(features2)):
                        match operation:
                            case "+":
                                res_non_social[data[i][feature1][j]][-1] += (data[i][features2[k]][j])
                            case "/":
                                res_non_social[data[i][feature1][j]][-1] /= (data[i][features2[k]][j])
    sorted_res_social = dict(sorted([elt for elt in res_social.items() if elt[1] != []]))
    plt.title("Social trials")
    plt.boxplot(sorted_res_social.values(), labels=sorted_res_social.keys(), showfliers=showfliers)
    plt.xlabel(feature1)
    plt.ylabel(operation.join([feature for feature in features2]))
    plt.show()
    sorted_res_non_social = dict(sorted([elt for elt in res_non_social.items() if elt[1] != []]))
    plt.title("Non social trials")
    plt.boxplot(sorted_res_non_social.values(), labels=sorted_res_non_social.keys(), showfliers=showfliers)
    plt.xlabel(feature1)
    plt.ylabel(operation.join([feature for feature in features2]))
    plt.show()
    plot_quartiles(sorted_res_social.keys(), list(sorted_res_social.values()), "social", color=0)
    plot_quartiles(sorted_res_non_social.keys(), list(sorted_res_non_social.values()), "non social", color=1)
    plt.legend()
    plt.show()

In [None]:
def compare_trials_social(features, data, operation, showfliers=True):
    res_social = []
    res_non_social = []
    for i in range(len(data)):
        for j in range(len(data[i][features[0]])):
            if len(res_social) <= j:
                res_social.append([])
                res_non_social.append([])
            if (not np.isnan(data[i][features[0]][j])):
                if (data[i]["social"][j] == "social"):
                    res_social[j].append(data[i][features[0]][j])
                    for k in range(1, len(features)):
                        match operation:
                            case "+":
                                res_social[j][-1] += data[i][features[k]][j]
                            case "/":
                                res_social[j][-1] /= data[i][features[k]][j]
                else:
                    res_non_social[j].append(data[i][features[0]][j])
                    for k in range(1, len(features)):
                        match operation:
                            case "+":
                                res_non_social[j][-1] += data[i][features[k]][j]
                            case "/":
                                res_non_social[j][-1] /= data[i][features[k]][j]
    res_social = [elt for elt in res_social if elt != []]
    res_non_social = [elt for elt in res_non_social if elt != []]
    plt.title("Non social trials")
    plt.boxplot(res_non_social, showfliers=showfliers)
    plt.xlabel("trial number")
    plt.ylabel(operation.join([feature for feature in features]))
    plt.show()
    plt.title("Social trials")
    plt.boxplot(res_social, showfliers=showfliers)
    plt.xlabel("trial number")
    plt.ylabel(operation.join([feature for feature in features]))
    plt.show()
    plot_quartiles([i+1 for i in range(len(res_social))], res_social, "social", color=0)
    plot_quartiles([i+1 for i in range(len(res_non_social))], res_non_social, "non social", color=1)
    plt.legend()
    plt.show()

##### Non-social

In [None]:
def compare_features_non_social(feature1, features2, data, operation, showfliers=True, plotquartiles=False):
    res = dict()
    for i in range(len(data)):
        features2_0_data = data[i][features2[0]]
        feature1_data = data[i][feature1]
        if len(features2_0_data) < len(feature1_data):
            feature1_data = feature1_data[(len(feature1_data) - len(features2_0_data)):]
        for j in range(len(features2_0_data)):
            if feature1_data[j] not in res.keys():
                res[feature1_data[j]] = []
            if not np.isnan(features2_0_data[j]):
                if features2_0_data[j] >= 0:
                    res[feature1_data[j]].append(features2_0_data[j])
                    for k in range(1, len(features2)):
                        match operation:
                            case "+":
                                res[feature1_data[j]][-1] += (data[i][features2[k]][j])
                            case "/":
                                res[feature1_data[j]][-1] /= (data[i][features2[k]][j])
    sorted_res = dict(sorted(res.items()))
    if plotquartiles:
        plot_quartiles(sorted_res.keys(), list(sorted_res.values()))
    else:
        plt.boxplot(sorted_res.values(), labels=sorted_res.keys(), showfliers=showfliers)
    plt.xlabel(feature1)
    plt.ylabel(operation.join([feature for feature in features2]))
    plt.show()

In [None]:
def compare_trials_non_social(features, data, operation, showfliers):
        res = []
        for i in range(len(data)):
            for j in range(len(data[i][features[0]])):
                if len(res) <= j:
                    res.append([])
                if (not np.isnan(data[i][features[0]][j])):
                    res[j].append(data[i][features[0]][j])
                    for k in range(1, len(features)):
                        match operation:
                            case "+":
                                res[j][-1] += data[i][features[k]][j]
                            case "/":
                                res[j][-1] /= data[i][features[k]][j]

        plt.boxplot(res, showfliers=showfliers)
        plt.xlabel("trial number")
        plt.ylabel(operation.join([feature for feature in features]))
        plt.show()

##### Global

In [None]:
def compare_features(feature1, features2, data, operation="+", showfliers=True, plotquartiles=False):
    if (social):
        compare_features_social(feature1, features2, data, operation, showfliers, plotquartiles)
    else:
        compare_features_non_social(feature1, features2, data, operation, showfliers, plotquartiles)
    

In [None]:
def compare_trials(features, data, operation="+", showfliers=True):
    if (social):
        compare_trials_social(features, data, operation, showfliers)
    else:
        compare_trials_non_social(features, data, operation, showfliers)

## Extract data

In [None]:
path = "data"
files = []
for file in [join(path, f) for f in os.listdir(path) if (isfile(join(path, f)) and f[0] != '.')]:
    files.append((file, pd.read_csv(file, quoting=csv.QUOTE_MINIMAL, engine='python')))

In [None]:
data = []
no_key_presses = set()
k = 0
count_attention_checks = np.zeros(4)
all_no_presses = np.zeros(25)
for name, file in files:
    # if k>10:
    #     break
    expe_data = dict()
    expe_data["PROLIFIC_PID"] = file["prolific_id"].loc[0]
    expe_data["id"] = str(file["run_id"].loc[0])
    expe_data["attention_check"] = 0
    cycling = True
    failed_attention_checks = 0
    count_no_presses = 0
    expe_data["bonus"] = False
    expe_data["browser"] = []
    expe_data["goal_selected"] = []
    expe_data["duration"] = []
    expe_data["control_level"] = []
    expe_data["real_control_level"] = []
    expe_data["questionnaires"] = dict()
    expe_data["reaction_time"] = []
    expe_data["social"] = []
    expe_data["outcome_total"] = []
    expe_data["outcome"] = []
    expe_data["finished"] = False
    for event_type in post_trial_q:
        expe_data[event_type] = []
    final_score = 0
    counts_keys = ["num_key_presses", "num_perfect", "num_errors", "num_whileblue", "num_whilered", "num_randdir", "num_good_moves"]
    for key in counts_keys:
        expe_data[key] = []
    for i in range(len(file)):
        trial_data = file.loc[i]
        try:
            question_id = trial_data["question_id"]
        except KeyError:
            pass
        trial_type = trial_data["trial_type"]
        try:
            event_type = trial_data["eventType"]
        except KeyError:
            pass
        if trial_type == "ctl-task-cpt":
            if social:
                expe_data["social"].append(trial_data["multiplayer"])
            goal_selected = trial_data["goal_selected"]
            if trial_data["goal_choice_rt"] < 4000 and trial_data["goal_choice_rt"] > 0:
                expe_data["goal_selected"].append(int(goal_selected))
            else:
                if goal_selected == 1:
                    expe_data["goal_selected"].append("timeout1")
                elif goal_selected == 2:
                    expe_data["goal_selected"].append("timeout2")
                else:
                    expe_data["goal_selected"].append(goal_selected)
                    warnings.warn_explicit("Goal is " + str(goal_selected), Warning, name, file.loc[i]["trial_index"])
            final_score = trial_data["participant_final_score"]
            expe_data["duration"].append(trial_data["phase3_start_time"])
            expe_data["browser"].append(trial_data["browser"])
            expe_data["control_level"].append(trial_data["control_level"])
            expe_data["outcome_total"].append(trial_data["outcome_total"])
            expe_data["outcome"].append(trial_data["outcome"])
            try:
                for key in counts_keys:
                    expe_data[key].append(ast.literal_eval(trial_data["counts"])[key])
            except ValueError:
                for key in counts_keys:
                    expe_data[key].append(np.nan)
                warnings.warn_explicit("NaN value in counts ignored", Warning, name, trial_data["trial_index"])
            except KeyError:
                warnings.warn_explicit("Key error: " + key, Warning, name, trial_data["trial_index"])
            sum_rt = 0
            num_rt = 0
            try:
                key_presses = ast.literal_eval(trial_data["key_presses"])
                for j in range(len(key_presses)):
                    if key_presses[j] in ["smooth", "error", "oops", "while_blue", "while_red"]:
                        num_rt += 1
                        sum_rt += float(key_presses[j - 1])
                if num_rt != 0:
                    avg_rt = sum_rt / num_rt
                    expe_data["reaction_time"].append(avg_rt)
                else:
                    expe_data["reaction_time"].append(np.nan)
                    no_key_presses.add(expe_data["id"])
                    count_no_presses += 1
                    # warnings.warn_explicit("No key presses during the trial", Warning, name, trial_data["trial_index"])
            except ValueError:
                expe_data["reaction_time"].append(np.nan)
                warnings.warn_explicit("NaN value in reaction time ignored", Warning, name, trial_data["trial_index"])
            if expe_data["num_perfect"][-1] + expe_data["num_randdir"][-1] != 0:
                expe_data["real_control_level"].append(round(expe_data["num_perfect"][-1] / (expe_data["num_perfect"][-1] + expe_data["num_randdir"][-1]), 1))
            else:
                expe_data["real_control_level"].append(-0.1)
        if trial_type == "survey-template":
            event_type = file.loc[i]["eventType"]
            expe_data["questionnaires"][event_type] = ast.literal_eval(file.loc[i]["responses"])
        if trial_type == "html-slider-response":
            event_type = trial_data["eventType"]
            if event_type in post_trial_q:
                try:
                    expe_data[event_type].append(int(file.loc[i]["response"]))
                except ValueError:
                    expe_data[event_type].append(np.nan)
                    warnings.warn_explicit("NaN value in " + event_type + " ignored", Warning, name, file.loc[i]["trial_index"])
        if trial_type == "html-keyboard-response":
            expe_data["finished"] = True
        if cycling:
            cycling = check_cycling(question_id, trial_data, name, expe_data)
        if trial_type == "survey-text":
            if "date" in trial_data["response"]:
                expe_data["dates"] = ast.literal_eval(trial_data["response"])
        
        failed_attention_checks += check_attention(event_type, trial_data)
    expe_data["final_score"] = final_score
    if final_score > (6 * 11):
        expe_data["bonus"] = True
    expe_data["failed_attention_checks"] = failed_attention_checks
    expe_data["cycling"] = cycling
    expe_data["no_press_trials"] = count_no_presses
    count_attention_checks[failed_attention_checks] += 1
    all_no_presses[count_no_presses] += 1
    k += 1
    if expe_data["finished"]:
        data.append(expe_data)
print(count_attention_checks)
print(all_no_presses)


## Figures

### Attention checks

In [None]:
plt.bar(range(4), count_attention_checks)
plt.xlabel("Number of failed attention checks")
plt.ylabel("Number of participants")
plt.show()

In [None]:
plt.bar(range(25), all_no_presses)
plt.xlabel("Number of trials with no key presses")
plt.ylabel("Number of participants")
plt.show()

### Key presses

In [None]:
feature = 'num_key_presses'
for expe_data in data:
    plt.plot([i for i in range(len(expe_data[feature]))], expe_data[feature])
    plt.xlabel("trial number")
    plt.ylabel("number of key presses")

In [None]:
compare_trials(["num_key_presses"], data, showfliers=False)

In [None]:
compare_features("control_level", ["num_key_presses"], data, showfliers=False)

In [None]:
res = []
for i in range(len(data)):
    num_randdir = data[i]["num_randdir"]
    for j in range(len(num_randdir)):
        if len(res) <= j:
            res.append([])
        if (not np.isnan(num_randdir[j])):
            total_whilegreen = num_randdir[j] + data[i]["num_perfect"][j]
            if total_whilegreen != 0:
                res[j].append(num_randdir[j] / total_whilegreen)

plt.boxplot(res)
plt.xlabel("trial number")
plt.ylabel("num_oops / num_whilegreen")
plt.show()

In [None]:
res = dict()
for i in range(len(data)):
    num_randdir = data[i]["num_randdir"]
    num_perfect = data[i]["num_perfect"]
    for j in range(len(data[i]["num_randdir"])):
        control_level = data[i]["control_level"]
        if control_level[j] not in res.keys():
            res[control_level[j]] = []
        if not np.isnan(num_randdir[j]):
            total_whilegreen = num_randdir[j] + num_perfect[j]
            if total_whilegreen != 0:
                res[control_level[j]].append(num_perfect[j] / total_whilegreen)
sorted_res = dict(sorted(res.items()))
plt.boxplot(sorted_res.values(), labels=sorted_res.keys())
plt.xlabel("control_level")
plt.ylabel("num_perfect / num_whilegreen")
plt.show()

### Errors

In [None]:
feature1 = 'num_whileblue'
feature2 = 'num_errors'
num_images = min(len(data), 4)
for i in range(num_images):
    plt.subplot(2, 2, i + 1)
    expe_data = data[i]
    plt.bar(
        [i for i in range(len(expe_data[feature1]))], 
        expe_data[feature1], 
        label=feature1,
    )
    plt.bar(
        [i for i in range(len(expe_data[feature2]))], 
        expe_data[feature2],
        label=feature2,
        bottom=expe_data[feature1]
    )
    plt.xlabel("trial number")
    plt.ylabel("number of errors")
    plt.legend()
    plt.tight_layout()


In [None]:
compare_trials(["num_errors", "num_whileblue"], data)

In [None]:
compare_features("control_level", ["num_whileblue"], data)

### Duration

In [None]:
feature = 'duration'
for expe_data in data:
    plt.plot([i for i in range(len(expe_data[feature]))], expe_data[feature])
    plt.xlabel("trial number")
    plt.ylabel("number of key presses")

In [None]:
compare_trials(["duration"], data, showfliers=False)

In [None]:
compare_features("control_level", ["duration"], data, showfliers=False)

### Reaction times

In [None]:
compare_trials(["reaction_time"], data, showfliers=False)

In [None]:
compare_features("browser", ["reaction_time"], data, showfliers=False)

In [None]:
compare_features("control_level", ["reaction_time"], data, showfliers=False)

In [None]:
compare_features("num_whileblue", ["reaction_time"], data, showfliers=False)

### Outcome

In [None]:
compare_trials(["outcome_total"], data, showfliers=False)

In [None]:
compare_features("control_level", ["outcome_total"], data, showfliers=False)

### General questionnaires

In [None]:
feature = "questionnaires"
res_social = dict()
for i in range(len(data)): # i represents the participant
    for key in data[i][feature].keys(): # key represents the questionnaire
        if key not in res_social.keys():
            res_social[key] = dict()
        for j in range(len(data[i][feature][key].keys())): # j represents the question
            question = list(data[i][feature][key].keys())[j]
            real_value = int(data[i][feature][key][question])
            if key in ["psst_symptoms", "psst_interference", "phq", "gad"]:
                real_value -= 1
            if reverse[key] and reverse[key][j]:
                real_value = len(scales[key]) - 1 - real_value
            if j not in list(res_social[key].keys()):
                res_social[key][j] = dict()
            if real_value not in res_social[key][j].keys():
                res_social[key][j][real_value] = 0
            res_social[key][j][real_value] += 1

for key in res_social.keys(): # key represents the questionnaire
    bottom = [0 for i in res_social[key].keys()]
    for ans in range(len(scales[key])): # ans represents the answer
        results_social = []
        for question in res_social[key].keys():
            try:
                results_social.append(res_social[key][question][ans])
            except:
                results_social.append(0)
        plt.bar(
            res_social[key].keys(), 
            results_social,
            label=scales[key][ans],
            bottom=bottom,
            color=plt.cm.jet((ans + .5) / len(scales[key]))
        )
        bottom = [results_social[i] + bottom[i] for i in range(len(results_social))]
        plt.legend()
        plt.title(key)
    plt.tight_layout()
    plt.show()


### Post-trial questionnaires

In [None]:
for qtype in post_trial_q:
    compare_trials([qtype], data)
compare_trials(["outcome"], data)

In [None]:
for qtype in post_trial_q:
    compare_features("real_control_level", [qtype], data, plotquartiles=True)

### Goals

In [None]:
if(social):
    results_social = {'1': [], '2': [], 'timeout1': [], 'timeout2': []}
    results_non_social = {'1': [], '2': [], 'timeout1': [], 'timeout2': []}
    for expe_data in data:
        for trial_index in range(len(expe_data["social"])):
            if (expe_data["social"][trial_index] == "social"):
                if trial_index - len(results_non_social["1"]) >= len(results_social['1']):
                    results_social["1"].append(0)
                    results_social["2"].append(0)
                    results_social["timeout1"].append(0)
                    results_social["timeout2"].append(0)
                results_social[str(expe_data["goal_selected"][trial_index])][trial_index - len(results_non_social["1"])] += 1
            else:
                if trial_index - len(results_social["1"]) >= len(results_non_social['1']):
                    results_non_social["1"].append(0)
                    results_non_social["2"].append(0)
                    results_non_social["timeout1"].append(0)
                    results_non_social["timeout2"].append(0)
                results_non_social[str(expe_data["goal_selected"][trial_index])][trial_index - len(results_social["1"])] += 1
    bottom = np.zeros(len(results_social["1"]))
    for key in results_social.keys():
        plt.bar(range(len(results_social[key])), results_social[key], bottom=bottom, label=key)
        bottom = bottom + results_social[key]
    plt.xlabel("trial")
    plt.legend()
    plt.ylabel("goal selected")
    plt.show()
    bottom = np.zeros(len(results_non_social["1"]))
    for key in results_non_social.keys():
        plt.bar(range(len(results_non_social[key])), results_non_social[key], bottom=bottom, label=key)
        bottom = bottom + results_non_social[key]
    plt.xlabel("trial")
    plt.legend()
    plt.ylabel("goal selected")
    plt.show()
else:
    results_social = {'1': [], '2': [], 'timeout1': [], "timeout2": []}
    for expe_data in data:
        for trial_index in range(len(expe_data["goal_selected"])):
            if trial_index >= len(results_social['1']):
                results_social["1"].append(0)
                results_social["2"].append(0)
                results_social["timeout1"].append(0)
                results_social["timeout2"].append(0)
            if str(expe_data["goal_selected"][trial_index]) in results_social.keys():
                results_social[str(expe_data["goal_selected"][trial_index])][trial_index] += 1
    bottom = np.zeros(len(results_social["1"]))
    for key in results_social.keys():
        plt.bar(range(len(results_social[key])), results_social[key], bottom=bottom, label=key)
        bottom = bottom + results_social[key]
    plt.xlabel("trial")
    plt.legend()
    plt.ylabel("goal selected")
    plt.show()

In [None]:
num_goal1 = np.zeros(len(data))
num_goal2 = np.zeros(len(data))
num_timeout1 = np.zeros(len(data))
num_timeout2 = np.zeros(len(data))
for i in range(len(data)):
    goals = data[i]["goal_selected"]
    for goal in goals:
        if goal == 1:
            num_goal1[i] += 1
        elif goal == 2:
            num_goal2[i] += 1
        elif goal == "timeout1":
            num_timeout1[i] += 1
        elif goal == "timeout2":
            num_timeout2[i] += 1
plt.boxplot([num_goal1, num_goal2, num_timeout1, num_timeout2], labels=["goal 1", "goal 2", "timeout1", "timeout2"])
plt.xlabel("goal selected")
plt.ylabel("number of trials (for each participant)")
plt.show()

In [None]:
num_goal1_won = np.zeros(len(data))
num_goal2_won = np.zeros(len(data))
num_timeout1_won = np.zeros(len(data))
num_timeout2_won = np.zeros(len(data))
num_won = np.zeros(len(data))
num = np.zeros(len(data))
for i in range(len(data)):
    goals = data[i]["goal_selected"]
    outcomes = data[i]["outcome"]
    for j in range(len(goals)):
        goal = goals[j]
        num[i] += 1
        if outcomes[j] >= 0:
            num_won[i] += 1
            if goal == 1:
                num_goal1_won[i] += 1
            elif goal == 2:
                num_goal2_won[i] += 1
            elif goal == "timeout1":
                num_timeout1_won[i] += 1
            elif goal == "timeout2":
                num_timeout2_won[i] += 1

goal1_prop = np.divide(num_goal1_won, num_goal1, where=num_goal1!=0, out=np.zeros(len(num_goal1), dtype=float))
goal2_prop = np.divide(num_goal2_won, num_goal2, where=num_goal2!=0, out=np.zeros(len(num_goal2), dtype=float))
timeout1_prop = np.divide(num_timeout1_won, num_timeout1, where=num_timeout1!=0, out=np.zeros(len(num_timeout1), dtype=float))
timeout2_prop = np.divide(num_timeout2_won, num_timeout2, where=num_timeout2!=0, out=np.zeros(len(num_timeout2), dtype=float))
won_prop = np.divide(num_won, num, where=num!=0, out=np.zeros(len(num), dtype=float))

plt.boxplot(
    [
        goal1_prop[num_goal1 != 0], 
        goal2_prop[num_goal2 != 0], 
        timeout1_prop[num_timeout1 != 0],
        timeout2_prop[num_timeout2 != 0],
        won_prop[num != 0]
    ], 
    labels=["goal 1", "goal 2", "timeout1", "timeout2", "total"],
)
plt.xlabel("goal selected")
plt.ylabel("proportion won")
plt.show()

In [None]:
num_goal1_lost = np.zeros(len(data))
num_goal2_lost = np.zeros(len(data))
num_timeout1_lost = np.zeros(len(data))
num_timeout2_lost = np.zeros(len(data))
for i in range(len(data)):
    goals = data[i]["goal_selected"]
    outcomes = data[i]["outcome"]
    for j in range(len(goals)):
        goal = goals[j]
        if outcomes[j] < 0:
            if goal == 1:
                num_goal1_lost[i] += 1
            elif goal == 2:
                num_goal2_lost[i] += 1
            elif goal == "timeout1":
                num_timeout1_lost[i] += 1
            elif goal == "timeout2":
                num_timeout2_lost[i] += 1

goal1_prop = np.divide(num_goal1_lost, num_goal1, where=num_goal1!=0, out=np.zeros(len(num_goal1), dtype=float))
goal2_prop = np.divide(num_goal2_lost, num_goal2, where=num_goal2!=0, out=np.zeros(len(num_goal2), dtype=float))
timeout1_prop = np.divide(num_timeout1_lost, num_timeout1, where=num_timeout1!=0, out=np.zeros(len(num_timeout1), dtype=float))
timeout2_prop = np.divide(num_timeout2_lost, num_timeout2, where=num_timeout2!=0, out=np.zeros(len(num_timeout2), dtype=float))

plt.boxplot(
    [
        goal1_prop[num_goal1 != 0], 
        goal2_prop[num_goal2 != 0], 
        timeout1_prop[num_timeout1 != 0],
        timeout2_prop[num_timeout2 != 0],
    ], 
    labels=["goal 1", "goal 2", "timeout1", "timeout2"],
)
plt.xlabel("goal selected")
plt.ylabel("proportion lost")
plt.show()

In [None]:
res = dict()
for i in range(len(data)):
    if len(data[i]["browser"]) > 0:
        browser = data[i]["browser"][0]
    if browser not in list(res.keys()):
        res[browser] = 0
    goals = data[i]["goal_selected"]
    for goal in goals:
        if goal not in [1, 2, "timeout1", "timeout2"]:
            res[browser] += 1
plt.bar(res.keys(), res.values())
plt.show()

### Groups

In [None]:
keys = ["PROLIFIC_PID", 'id', "cycling", "failed_attention_checks", "no_press_trials", "bonus", "finished"]
export = pd.DataFrame(columns=keys)
for expe_data in data:
    export.loc[len(export)] = [expe_data[key] for key in keys]
export = export.dropna()

In [None]:
bonus_cycle = export[export["finished"]][export["bonus"]][export["cycling"]][export["failed_attention_checks"] < 2]

In [None]:
bonus_no_cycle = export[export["finished"]][export["bonus"]][export["cycling"] == False][export["failed_attention_checks"] < 2]

In [None]:
no_bonus_cycle = export[export["finished"]][export["bonus"] == False][export["cycling"]][export["failed_attention_checks"] < 2]

In [None]:
no_bonus_no_cycle = export[export["finished"]][export["bonus"] == False][export["cycling"] == False][export["failed_attention_checks"] < 2]

In [None]:
attention_fail = export[export["finished"]][export["failed_attention_checks"] == 1]

In [None]:
print(len(bonus_cycle) + len(no_bonus_cycle))
plt.bar(["cycle bonus", "cycle no bonus", "no cycle no bonus", "no cycle bonus"], [len(bonus_cycle), len(no_bonus_cycle), len(no_bonus_no_cycle), len(bonus_no_cycle)])
plt.tight_layout()
plt.show()

## Export

### Completion codes

In [None]:
file = pd.read_csv("../../data/data_tests/demographic.csv", quoting=csv.QUOTE_MINIMAL, engine='python')
print("In the file but not in my data:")
for i in range(len(file)):
    if file.loc[i]["Completion code"] == "CUYYH5VH":
        id = file.loc[i]["Participant id"]
        if len(no_bonus_no_cycle[no_bonus_no_cycle["PROLIFIC_PID"] == id]) == 0:
            print(id)
print("in my data but not in the file: ")
for id in range(len(no_bonus_no_cycle)):
    pid = no_bonus_no_cycle.iloc[id]["PROLIFIC_PID"]
    if pid not in list(file[file["Completion code"] == "CUYYH5VH"]["Participant id"]):
        print(pid)

In [None]:
print("In the file but not in my data:")
for i in range(len(file)):
    if file.loc[i]["Completion code"] == "CHSMEF3Q":
        id = file.loc[i]["Participant id"]
        if len(bonus_no_cycle[bonus_no_cycle["PROLIFIC_PID"] == id]) == 0:
            print(id)
print("in my data but not in the file: ")
for id in range(len(bonus_no_cycle)):
    pid = bonus_no_cycle.iloc[id]["PROLIFIC_PID"]
    if pid not in list(file[file["Completion code"] == "CHSMEF3Q"]["Participant id"]):
        print(pid)

In [None]:
print("In the file but not in my data:")
for i in range(len(file)):
    if file.loc[i]["Completion code"] == "C88V7W7C":
        id = file.loc[i]["Participant id"]
        if len(no_bonus_cycle[no_bonus_cycle["PROLIFIC_PID"] == id]) == 0:
            print(id)
print("in my data but not in the file: ")
for id in range(len(no_bonus_cycle)):
    pid = no_bonus_cycle.iloc[id]["PROLIFIC_PID"]
    if pid not in list(file[file["Completion code"] == "C88V7W7C"]["Participant id"]):
        print(pid)

In [None]:
print("In the file but not in my data:")
for i in range(len(file)):
    if file.loc[i]["Completion code"] == "C1N4G4YB":
        id = file.loc[i]["Participant id"]
        if len(bonus_cycle[bonus_cycle["PROLIFIC_PID"] == id]) == 0:
            print(id)
print("in my data but not in the file: ")
for id in range(len(bonus_cycle)):
    pid = bonus_cycle.iloc[id]["PROLIFIC_PID"]
    if pid not in list(file[file["Completion code"] == "C1N4G4YB"]["Participant id"]):
        print(pid)

In [None]:
print("In the file but not in my data:")
for i in range(len(file)):
    if file.loc[i]["Completion code"] == "C10I36ND":
        id = file.loc[i]["Participant id"]
        if len(attention_fail[attention_fail["PROLIFIC_PID"] == id]) == 0:
            print(id)
print("in my data but not in the file: ")
for id in range(len(attention_fail)):
    pid = attention_fail.iloc[id]["PROLIFIC_PID"]
    if pid not in list(file[file["Completion code"] == "C10I36ND"]["Participant id"]):
        print(pid)


In [None]:
# export.to_csv("completion_codes.csv")