In [1]:

import json
import pandas as pd
import numpy as np
from toolz import memoize
import datetime
import math

from tqdm import tqdm
from src.utils.mouselab_jas import MouselabJas
from src.utils.distributions import Normal, expectation
from src.utils.env_creation import create_tree, create_init
from src.utils.env_export import create_json
from src.utils.data_classes import MouselabConfig, Action
from simulation import run_simulation
from src.policy.jas_voc_policy import JAS_voc_policy
from src.policy.jas_policy import RandomPolicy, ExhaustivePolicy, RandomNPolicy
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from src.utils.utils import sigma_to_tau
import numpy as np
from src.utils.env_export import format_payoff


sns.set(rc={'figure.figsize':(10,6)})
sns.set(font_scale=1.5)
sns.set_theme()

%load_ext autoreload
%autoreload 2

In [2]:
data = json.load(open("./data/dataclips_3.json"))

In [3]:
language_index = data["fields"].index("language")
response_data_index = data["fields"].index("datastring")
begin_index = data["fields"].index("beginhit")
end_index = data["fields"].index("endhit")

f = '%Y-%m-%d %H:%M:%S.%f'

In [4]:
# Fixed from paper
num_projects = 5
num_criteria = 6
weights = [0.0206795, 0.0672084, 0.2227102, 0.1067428, 0.4665054, 0.1161537]
expert_stds = [1.5616618964384956, 1.449172525995787, 1.5205992970609392, 1.5469422429523034, 1.511270787760881, 1.455189251463794]
expert_taus = list(sigma_to_tau(np.array(expert_stds)))

mu = [3.6, 3.1666666666666665, 3.6, 3.1333333333333333, 3.6666666666666665, 2.3]
sigma = [1.3544307876819288, 1.2617266038997932, 1.3796551293211172, 1.2521246311585852, 1.5161960871578068, 0.9523111632886272]

init = create_init([0]+(mu*num_projects), [0]+(sigma*num_projects))
expert_costs = [0.002]*6

config = MouselabConfig(num_projects, num_criteria, expert_costs, expert_taus, init, criteria_scale=weights, term_belief=False, max_actions=5)
env = MouselabJas(config=config)

In [5]:
def convert_action(project: int, criteria: int, expert: int, config: MouselabConfig) -> Action:
    query = (1 + criteria) + project*config.num_criterias
    action = Action(expert=expert, query=query)
    return action

In [13]:
df_index = ["Participant", "TrialId", "Score", "ExpectedScore", "NumClicks", "Actions", "Selection",
        "Seed"]    
df_data = []

bonus_data = {}
known_workers = []
good_responses = 0
demographics = []

# Parse raw mturk data into dataframe
for p_index, p_data in tqdm(enumerate(data["values"])):
    # Filter out empty responses
    language = p_data[language_index]
    response_data = p_data[response_data_index]
    if p_data[begin_index] and p_data[end_index]:
        begin = datetime.datetime.strptime(p_data[begin_index], f)
        end = datetime.datetime.strptime(p_data[end_index], f)
        duration = (end - begin).total_seconds()
    else:
        duration = None
    if (response_data != None):
        p_res_obj = json.loads(response_data)
        if (not p_res_obj["workerId"].startswith("debug")):
            condition = p_res_obj["condition"]
            worker = p_index # p_res_obj["workerId"]#
            if worker in known_workers:
                print("Duplicate worker", worker)
            else: 
                known_workers.append(worker)
            p_res = p_res_obj["data"]
            participant_responses = []
            if "quiz_failures" in p_res_obj["questiondata"].keys():
                quiz_failures = p_res_obj["questiondata"]["quiz_failures"]
            else:
                quiz_failures = 0
            if "final_bonus" in p_res_obj["questiondata"].keys():
                bonus =  p_res_obj["questiondata"]["final_bonus"]
            else:
                bonus = 0
            participant_survey = {"Participant": worker, "Condition": condition, "Language": language, "QuizAttempts": 0, "QuizFailures": quiz_failures, "Bonus": bonus, "Duration": duration}
            completed_test_trials = sum([1 if ('trial_id' in  p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("test")) else 0 for i in range(len(p_res))])
            if (quiz_failures < 3) and (completed_test_trials == 10):
                for i in range(len(p_res)):
                    # Get test trials
                    if 'trial_id' in p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("test"):
                        trial_id = p_res[i]['trialdata']['trial_id']
                        seed = int(p_res[i]['trialdata']['seed'])
                        ground_truth = p_res[i]['trialdata']['ground_truth']
                        # project, criteria, expert
                        clicks = p_res[i]['trialdata']["clicks"]
                        num_clicks = len(clicks)
                        selected_project = p_res[i]['trialdata']["selected_project"]
                        term_reward = p_res[i]['trialdata']["reward"]
                        expected_reward = p_res[i]['trialdata']["expected_reward"]
                        # Calculate real env rewards
                        env.reset(seed=seed)
                        actions = [convert_action(*click, config) for click in clicks]
                        cost = 0
                        for action in actions:
                            _, reward, _, _ = env.step(action)
                            cost += reward
                        path = np.array(range(1, config.num_criterias+1))+(selected_project*config.num_criterias)
                        env_expected_reward = cost + env.expected_path_value(path, env.state)
                        env_term_reward = cost + env.path_value(path)
                        assert np.all(np.isclose(ground_truth, env.ground_truth.tolist()))
                        assert np.all(np.isclose(np.array(format_payoff(config.num_projects, config.num_criterias, env.expert_truths.tolist())), p_res[i]['trialdata']['payoff_matrix']))
                        assert np.isclose(term_reward, env_term_reward)
                        assert np.isclose(expected_reward, env_expected_reward)
                        
                        #["Participant", "TrialId", "Score", "ExpectedScore", "NumClicks", "Actions", "Selection", "Seed"] 
                        df_data.append([worker, trial_id, term_reward, expected_reward, num_clicks, clicks, selected_project, seed])
                    elif 'trial_id' in p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("train"):
                        pass
                    elif p_res[i]['trialdata']["trial_type"] == "survey-text":
                        #print(p_res[i]['trialdata'].keys())
                        print(p_res[i]['trialdata']["response"])
                        for key, val in p_res[i]['trialdata'].items():
                            pass
                            #print(key, val)
                    

df = pd.DataFrame(df_data, columns=df_index)
print("Responses:", len(df["Participant"].unique()))

19it [00:00, 88.81it/s]

{'Age': '59', 'Gender': 'Female', 'Issues': 'None', 'Feedback': ''}
{'Age': '21', 'Gender': 'Male', 'Issues': '', 'Feedback': ''}
{'Age': '38', 'Gender': 'Male', 'Issues': 'None', 'Feedback': 'None'}
{'Age': '29', 'Gender': 'Male', 'Issues': 'No', 'Feedback': 'None'}
{'Age': '21', 'Gender': 'Male', 'Issues': 'No issues', 'Feedback': '-'}
{'Age': '23', 'Gender': 'male', 'Issues': '', 'Feedback': ''}
{'Age': '47', 'Gender': 'male', 'Issues': 'no', 'Feedback': ''}
{'Age': '20', 'Gender': 'Male', 'Issues': 'No it was smoooth sailing for me', 'Feedback': 'It was cool. The thrill of uncertity made it more fun like roullete'}
{'Age': '20', 'Gender': 'female', 'Issues': '', 'Feedback': ''}
{'Age': '25', 'Gender': 'FEMALE', 'Issues': 'NO', 'Feedback': 'IT WAS AMAZING'}
{'Age': '32', 'Gender': 'Female', 'Issues': 'None', 'Feedback': 'None'}
{'Age': '36', 'Gender': 'female', 'Issues': 'no', 'Feedback': ''}
{'Age': '25', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'none'}
{'Age': '20', 'Gender

45it [00:00, 94.68it/s] 

{'Age': '45', 'Gender': 'Male', 'Issues': 'no', 'Feedback': ''}
{'Age': '22', 'Gender': 'male', 'Issues': 'no', 'Feedback': ''}
{'Age': '40', 'Gender': 'female', 'Issues': 'no', 'Feedback': ''}
{'Age': '27', 'Gender': 'Male', 'Issues': 'No', 'Feedback': 'None'}
{'Age': '26', 'Gender': 'FEMALE', 'Issues': 'NO', 'Feedback': 'NO'}
{'Age': '24', 'Gender': 'female', 'Issues': 'yes', 'Feedback': 'improve your imstriclk\n'}
{'Age': '25', 'Gender': 'Man', 'Issues': 'No issues.', 'Feedback': ''}
{'Age': '37', 'Gender': 'MAle', 'Issues': 'No issue', 'Feedback': 'Good luck with the experiment'}
{'Age': '57', 'Gender': 'Male', 'Issues': 'No.', 'Feedback': 'I noticed the identical nature of the scores and chose from the experts who appeared to have the highest star ratings. I sought their advice on the one factor with the largest scaleability.'}
{'Age': '30', 'Gender': 'Male', 'Issues': 'No. ', 'Feedback': ''}
{'Age': '23', 'Gender': 'Male', 'Issues': 'None', 'Feedback': 'The variables were too ran

72it [00:00, 66.14it/s]

{'Age': '22', 'Gender': 'Male', 'Issues': 'No issues but I think that instruction could be a little easier to understand ', 'Feedback': ''}
{'Age': '20', 'Gender': 'Female', 'Issues': 'no', 'Feedback': 'no'}
{'Age': '22', 'Gender': 'Female', 'Issues': 'No', 'Feedback': 'No'}
{'Age': '23', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'no'}
{'Age': '29', 'Gender': 'Male', 'Issues': 'no', 'Feedback': 'no'}
{'Age': '23', 'Gender': 'male', 'Issues': 'no', 'Feedback': 'no'}
{'Age': '30', 'Gender': 'female', 'Issues': 'n/a', 'Feedback': 'n/a'}
{'Age': '25', 'Gender': 'Female', 'Issues': 'No, everything was clear ', 'Feedback': ':)'}
{'Age': '24', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'suggesting you use "they" instead of "he" when referring to the expert'}
{'Age': '24', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'thank you for the detailed explanation'}
{'Age': '43', 'Gender': 'male', 'Issues': 'no', 'Feedback': 'Thank you!'}
{'Age': '34', 'Gender': 'Male', 'Issues': 'No', '

99it [00:01, 77.23it/s]

{'Age': '23', 'Gender': 'm', 'Issues': '', 'Feedback': ''}
{'Age': '23', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'no'}
{'Age': '28', 'Gender': 'Male', 'Issues': 'N/A', 'Feedback': 'N/A'}
{'Age': '36', 'Gender': 'Male', 'Issues': 'No', 'Feedback': ''}
{'Age': '26', 'Gender': 'female', 'Issues': '', 'Feedback': 'the study was a bit confusing'}
{'Age': '25', 'Gender': 'Female', 'Issues': 'no', 'Feedback': 'n/a'}
{'Age': '26', 'Gender': 'Female', 'Issues': 'No', 'Feedback': 'No'}
{'Age': '34', 'Gender': 'm', 'Issues': 'no', 'Feedback': 'interesting maths ))'}
{'Age': '22', 'Gender': 'Male', 'Issues': '', 'Feedback': ''}
{'Age': '48', 'Gender': 'Male', 'Issues': 'No', 'Feedback': ''}
{'Age': '23', 'Gender': 'feminine', 'Issues': 'no', 'Feedback': 'no thanks'}
{'Age': '27', 'Gender': 'female', 'Issues': 'no', 'Feedback': 'none'}
{'Age': '34', 'Gender': 'Female', 'Issues': 'no', 'Feedback': 'It was an interesting survey'}
{'Age': '34', 'Gender': 'Male', 'Issues': 'No', 'Feedback': 'Ve

103it [00:01, 77.63it/s]

{'Age': '20', 'Gender': 'Female', 'Issues': 'No', 'Feedback': ''}
{'Age': '24', 'Gender': 'Female', 'Issues': 'Took time to load', 'Feedback': 'None'}
{'Age': '27', 'Gender': 'Male', 'Issues': '', 'Feedback': ''}
Responses: 80





In [14]:
df.agg(["mean", "std"])

  df.agg(["mean", "std"])


Unnamed: 0,Participant,Score,ExpectedScore,NumClicks,Selection,Seed
mean,53.225,3.681972,3.530881,3.2975,1.93,15.5
std,28.902744,0.7189,0.187646,2.02401,1.376579,2.874078


In [15]:
participant_scores = df.groupby("Participant").agg(["mean", "std"])

  participant_scores = df.groupby("Participant").agg(["mean", "std"])


In [16]:
participant_scores

Unnamed: 0_level_0,Score,Score,ExpectedScore,ExpectedScore,NumClicks,NumClicks,Selection,Selection,Seed,Seed
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std
Participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,3.935127,0.444445,3.690650,0.076789,5.0,0.000000,2.1,1.370320,15.5,3.02765
1,3.830818,0.600608,3.641995,0.189835,3.4,1.173788,1.4,0.843274,15.5,3.02765
2,3.717115,0.653587,3.598912,0.131987,2.7,0.823273,1.0,1.247219,15.5,3.02765
3,3.459099,0.538862,3.401164,0.000000,0.0,0.000000,2.2,1.398412,15.5,3.02765
4,3.968477,0.634093,3.656017,0.106005,2.8,1.316561,1.7,1.159502,15.5,3.02765
...,...,...,...,...,...,...,...,...,...,...
97,3.706930,0.909851,3.532166,0.320513,5.0,0.000000,1.8,1.398412,15.5,3.02765
98,3.435311,1.038284,3.401164,0.000000,0.0,0.000000,2.5,1.433721,15.5,3.02765
99,3.922275,0.470476,3.715833,0.000621,5.0,0.000000,2.4,1.264911,15.5,3.02765
100,3.709821,0.584997,3.445381,0.146048,4.0,2.108185,2.1,1.728840,15.5,3.02765


In [17]:
participant_scores = participant_scores.sort_values(("Score", "mean"), ascending=False)
participant_scores

Unnamed: 0_level_0,Score,Score,ExpectedScore,ExpectedScore,NumClicks,NumClicks,Selection,Selection,Seed,Seed
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std
Participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
80,4.154553,0.541421,3.521751,0.142100,5.0,0.000000,2.6,1.264911,15.5,3.02765
81,4.142450,0.426249,3.691450,0.077093,4.6,0.966092,2.5,1.433721,15.5,3.02765
91,4.079858,0.542910,3.720976,0.002800,2.3,1.159502,1.9,1.370320,15.5,3.02765
90,4.078694,0.668906,3.543544,0.121341,5.0,0.000000,2.3,1.702939,15.5,3.02765
59,4.042940,0.312845,3.749503,0.052321,5.0,0.000000,2.2,1.229273,15.5,3.02765
...,...,...,...,...,...,...,...,...,...,...
41,3.209696,0.900599,3.247743,0.442606,5.0,0.000000,0.0,0.000000,15.5,3.02765
39,3.191295,0.976806,3.435621,0.081904,0.3,0.674949,2.6,1.173788,15.5,3.02765
82,3.169525,0.941334,3.525364,0.140156,4.8,0.632456,2.1,1.595131,15.5,3.02765
66,3.049475,0.783129,3.474709,0.139943,3.8,1.316561,1.2,1.549193,15.5,3.02765


In [18]:
sorted_participants = participant_scores.index.tolist()
half_participants = math.ceil(len(sorted_participants)/2)
high_bonus = sorted_participants[:half_participants]
low_bonus = sorted_participants[half_participants:]
for id in high_bonus:
    print(f"{id},0.75")
for id in low_bonus:
    print(f"{id},0.25")

80,0.75
81,0.75
91,0.75
90,0.75
59,0.75
11,0.75
65,0.75
18,0.75
89,0.75
52,0.75
9,0.75
4,0.75
50,0.75
67,0.75
101,0.75
0,0.75
99,0.75
54,0.75
92,0.75
55,0.75
86,0.75
58,0.75
35,0.75
71,0.75
72,0.75
1,0.75
56,0.75
43,0.75
78,0.75
49,0.75
69,0.75
74,0.75
63,0.75
88,0.75
68,0.75
77,0.75
70,0.75
31,0.75
21,0.75
2,0.75
53,0.25
61,0.25
100,0.25
97,0.25
46,0.25
96,0.25
17,0.25
47,0.25
48,0.25
36,0.25
64,0.25
5,0.25
60,0.25
29,0.25
40,0.25
85,0.25
15,0.25
22,0.25
87,0.25
12,0.25
62,0.25
16,0.25
73,0.25
3,0.25
42,0.25
98,0.25
19,0.25
34,0.25
95,0.25
38,0.25
45,0.25
13,0.25
79,0.25
51,0.25
30,0.25
41,0.25
39,0.25
82,0.25
66,0.25
33,0.25
