In [1]:

import json
import pandas as pd
import numpy as np
from toolz import memoize
import datetime
import math

from tqdm import tqdm
from src.utils.mouselab_jas import MouselabJas
from src.utils.distributions import Normal, expectation
from src.utils.env_creation import create_tree, create_init
from src.utils.env_export import create_json
from src.utils.data_classes import MouselabConfig, Action
from simulation import run_simulation
from src.policy.jas_voc_policy import JAS_voc_policy
from src.policy.jas_policy import RandomPolicy, ExhaustivePolicy, RandomNPolicy
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from src.utils.utils import sigma_to_tau
import numpy as np
from src.utils.env_export import format_payoff


sns.set(rc={'figure.figsize':(10,6)})
sns.set(font_scale=1.5)
sns.set_theme()

%load_ext autoreload
%autoreload 2

In [2]:
data = json.load(open("./data/dataclips_2.json"))

In [3]:
language_index = data["fields"].index("language")
response_data_index = data["fields"].index("datastring")
begin_index = data["fields"].index("beginhit")
end_index = data["fields"].index("endhit")

f = '%Y-%m-%d %H:%M:%S.%f'

In [4]:
# Fixed from paper
num_projects = 5
num_criteria = 6
weights = [0.0206795, 0.0672084, 0.2227102, 0.1067428, 0.4665054, 0.1161537]
expert_stds = [1.5616618964384956, 1.449172525995787, 1.5205992970609392, 1.5469422429523034, 1.511270787760881, 1.455189251463794]
expert_taus = list(sigma_to_tau(np.array(expert_stds)))

mu = [3.6, 3.1666666666666665, 3.6, 3.1333333333333333, 3.6666666666666665, 2.3]
sigma = [1.3544307876819288, 1.2617266038997932, 1.3796551293211172, 1.2521246311585852, 1.5161960871578068, 0.9523111632886272]

init = create_init([0]+(mu*num_projects), [0]+(sigma*num_projects))
expert_costs = [0.002]*6

config = MouselabConfig(num_projects, num_criteria, expert_costs, expert_taus, init, criteria_scale=weights, term_belief=False, max_actions=5)
env = MouselabJas(config=config)

In [5]:
def convert_action(project: int, criteria: int, expert: int, config: MouselabConfig) -> Action:
    query = (1 + criteria) + project*config.num_criterias
    action = Action(expert=expert, query=query)
    return action

In [26]:
df_index = ["Participant", "TrialId", "Score", "ExpectedScore", "NumClicks", "Actions", "Selection",
        "Seed"]    
df_data = []

bonus_data = {}
known_workers = []
good_responses = 0
demographics = []

# Parse raw mturk data into dataframe
for p_index, p_data in tqdm(enumerate(data["values"])):
    # Filter out empty responses
    language = p_data[language_index]
    response_data = p_data[response_data_index]
    if p_data[begin_index] and p_data[end_index]:
        begin = datetime.datetime.strptime(p_data[begin_index], f)
        end = datetime.datetime.strptime(p_data[end_index], f)
        duration = (end - begin).total_seconds()
    else:
        duration = None
    if (response_data != None):
        p_res_obj = json.loads(response_data)
        if (not p_res_obj["workerId"].startswith("debug")):
            condition = p_res_obj["condition"]
            worker = p_index # p_res_obj["workerId"]#
            if worker in known_workers:
                print("Duplicate worker", worker)
            else: 
                known_workers.append(worker)
            p_res = p_res_obj["data"]
            participant_responses = []
            if "quiz_failures" in p_res_obj["questiondata"].keys():
                quiz_failures = p_res_obj["questiondata"]["quiz_failures"]
            else:
                quiz_failures = 0
            if "final_bonus" in p_res_obj["questiondata"].keys():
                bonus =  p_res_obj["questiondata"]["final_bonus"]
            else:
                bonus = 0
            participant_survey = {"Participant": worker, "Condition": condition, "Language": language, "QuizAttempts": 0, "QuizFailures": quiz_failures, "Bonus": bonus, "Duration": duration}
            completed_test_trials = sum([1 if ('trial_id' in  p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("test")) else 0 for i in range(len(p_res))])
            if (quiz_failures < 3) and (completed_test_trials == 10):
                for i in range(len(p_res)):
                    # Get test trials
                    if 'trial_id' in p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("test"):
                        trial_id = p_res[i]['trialdata']['trial_id']
                        seed = int(p_res[i]['trialdata']['seed'])
                        ground_truth = p_res[i]['trialdata']['ground_truth']
                        # project, criteria, expert
                        clicks = p_res[i]['trialdata']["clicks"]
                        num_clicks = len(clicks)
                        selected_project = p_res[i]['trialdata']["selected_project"]
                        term_reward = p_res[i]['trialdata']["reward"]
                        expected_reward = p_res[i]['trialdata']["expected_reward"]
                        # Calculate real env rewards
                        env.reset(seed=seed)
                        actions = [convert_action(*click, config) for click in clicks]
                        cost = 0
                        for action in actions:
                            _, reward, _, _ = env.step(action)
                            cost += reward
                        path = np.array(range(1, config.num_criterias+1))+(selected_project*config.num_criterias)
                        env_expected_reward = cost + env.expected_path_value(path, env.state)
                        env_term_reward = cost + env.path_value(path)
                        assert np.all(np.isclose(ground_truth, env.ground_truth.tolist()))
                        assert np.all(np.isclose(np.array(format_payoff(config.num_projects, config.num_criterias, env.expert_truths.tolist())), p_res[i]['trialdata']['payoff_matrix']))
                        assert np.isclose(term_reward, env_term_reward)
                        assert np.isclose(expected_reward, env_expected_reward)
                        
                        #["Participant", "TrialId", "Score", "ExpectedScore", "NumClicks", "Actions", "Selection", "Seed"] 
                        df_data.append([worker, trial_id, term_reward, expected_reward, num_clicks, clicks, selected_project, seed])
                    elif 'trial_id' in p_res[i]['trialdata'].keys() and p_res[i]['trialdata']['trial_id'].startswith("train"):
                        pass
                    elif p_res[i]['trialdata']["trial_type"] == "survey-text":
                        #print(p_res[i]['trialdata'].keys())
                        print(p_res[i]['trialdata']["response"])
                        for key, val in p_res[i]['trialdata'].items():
                            pass
                            #print(key, val)
                    

df = pd.DataFrame(df_data, columns=df_index)
print("Responses:", len(df["Participant"].unique()))

7it [00:00, 101.49it/s]

{'Age': '20', 'Gender': 'man', 'Issues': 'not at all', 'Feedback': 'i think there was too little experts for 5 plans to have some fun from checking them '}
{'Age': '23', 'Gender': 'male', 'Issues': 'no', 'Feedback': ''}
{'Age': '29', 'Gender': 'Female', 'Issues': 'not at all', 'Feedback': ''}
{'Age': '66', 'Gender': 'Male', 'Issues': 'none', 'Feedback': ''}
{'Age': '21', 'Gender': 'female', 'Issues': 'no', 'Feedback': ''}
Responses: 5





In [27]:
df

Unnamed: 0,Participant,TrialId,Score,ExpectedScore,NumClicks,Actions,Selection,Seed
0,1,test_0,3.916634,3.868225,5,"[[2, 4, 1], [3, 4, 1], [3, 4, 5], [3, 2, 1], [...",3,20
1,1,test_1,4.974508,3.96436,5,"[[3, 4, 1], [3, 4, 5], [3, 2, 1], [3, 0, 4], [...",3,19
2,1,test_2,3.878941,3.61436,5,"[[0, 4, 1], [1, 4, 1], [1, 4, 5], [1, 1, 1], [...",1,18
3,1,test_3,4.136771,3.961448,4,"[[3, 4, 1], [2, 4, 2], [2, 2, 1], [2, 4, 5]]",2,17
4,1,test_4,3.518098,3.716219,5,"[[1, 4, 1], [2, 4, 1], [0, 4, 1], [3, 4, 1], [...",4,16
5,1,test_5,3.008549,3.496444,5,"[[1, 4, 1], [2, 4, 1], [0, 4, 5], [1, 4, 5], [...",1,15
6,1,test_6,4.10266,3.354563,5,"[[1, 4, 1], [2, 4, 5], [2, 4, 1], [2, 1, 1], [...",2,14
7,1,test_7,4.492135,3.582355,5,"[[0, 4, 1], [0, 4, 5], [0, 0, 0], [0, 2, 1], [...",0,13
8,1,test_8,4.330025,3.716219,5,"[[1, 4, 1], [1, 4, 5], [0, 4, 1], [0, 4, 5], [...",2,12
9,1,test_9,2.873171,3.66034,4,"[[1, 4, 1], [1, 4, 5], [0, 4, 5], [0, 4, 1]]",0,11


In [28]:
participant_scores = df.groupby("Participant").agg(["mean", "std"])

  participant_scores = df.groupby("Participant").agg(["mean", "std"])


In [29]:
participant_scores

Unnamed: 0_level_0,Score,Score,ExpectedScore,ExpectedScore,NumClicks,NumClicks,Selection,Selection,Seed,Seed
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std
Participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1,3.923149,0.647861,3.693453,0.197244,4.8,0.421637,1.8,1.316561,15.5,3.02765
2,3.995218,0.529958,3.676332,0.144489,4.3,0.948683,1.9,1.100505,15.5,3.02765
4,3.387435,0.539269,3.384668,0.102706,1.8,1.873796,2.1,1.595131,15.5,3.02765
5,3.631679,0.98338,3.549106,0.116594,3.4,0.966092,1.9,1.197219,15.5,3.02765
6,3.99489,0.634546,3.559445,0.143454,4.3,1.636392,2.8,1.229273,15.5,3.02765


In [30]:
participant_scores = participant_scores.sort_values(("Score", "mean"), ascending=False)
participant_scores

Unnamed: 0_level_0,Score,Score,ExpectedScore,ExpectedScore,NumClicks,NumClicks,Selection,Selection,Seed,Seed
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std
Participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2,3.995218,0.529958,3.676332,0.144489,4.3,0.948683,1.9,1.100505,15.5,3.02765
6,3.99489,0.634546,3.559445,0.143454,4.3,1.636392,2.8,1.229273,15.5,3.02765
1,3.923149,0.647861,3.693453,0.197244,4.8,0.421637,1.8,1.316561,15.5,3.02765
5,3.631679,0.98338,3.549106,0.116594,3.4,0.966092,1.9,1.197219,15.5,3.02765
4,3.387435,0.539269,3.384668,0.102706,1.8,1.873796,2.1,1.595131,15.5,3.02765


In [31]:
sorted_participants = participant_scores.index.tolist()
half_participants = math.ceil(len(sorted_participants)/2)
high_bonus = sorted_participants[:half_participants]
low_bonus = sorted_participants[half_participants:]
for id in high_bonus:
    print(f"{id},0.75")
for id in low_bonus:
    print(f"{id},0.25")

2,0.75
6,0.75
1,0.75
5,0.25
4,0.25
