# POMDP APPROACH

In [12]:
from dai_pomdp.ModelLearning.utils import *
from dai_pomdp.Data import *
from dai_pomdp.Ballots import *
from dai_pomdp.helpers import *

import random
import time
import subprocess
#from random import random
from os import mkdir, rmdir
from copy import deepcopy
from math import floor
from functools import reduce
import numpy as np
import math

# ACTIONS
CONST_NO_ANSWER = -1
CONST_REQUEST_VOTE = 0
CONST_SUBMIT_ZERO = 1
CONST_SUBMIT_ONE = 2

def round_to_3(value):
    return round(value, 3)

def are_items_unresolved(answers):
    return any(answer == CONST_NO_ANSWER for answer in answers)

def get_unresolved_items(answers):
    return [key for key, answer in enumerate(answers) if answer == CONST_NO_ANSWER]

def get_random_worker(workers_error_rates, item_id, votes):
    item_votes = votes[item_id].copy()
    worker_ids_used = item_votes.keys()
    workers_ids_range = [k for k,v in enumerate(workers_error_rates)]
    workers_ids_unused = [val for val in workers_ids_range if val not in worker_ids_used]
    
    if (len(workers_ids_unused) == 0):
        used = len(worker_ids_used)
        ranges = len(workers_ids_range)
        unu = len(workers_ids_unused)
        print(f'used: {used}')
        print(f'workers: {ranges}')
        print(f'unused: {unu}')
        raise ValueError("Unused empty!?")
    
    selected_worker_id = np.random.choice(workers_ids_unused)
    error_rate = workers_error_rates[selected_worker_id]

    return selected_worker_id, error_rate

def get_accuracy(item_diff, worker_error_rate):
    return (1.0 / 2) * (1.0 + (1.0 - item_diff) ** worker_error_rate)

def get_worker_vote(item_id, items_votes, items_difficulties, items_gt, workers_error_rates):
    selected_worker_id, worker_error_rate = get_random_worker(workers_error_rates, item_id, items_votes)
    worker_acc = get_accuracy(items_difficulties[item_id], worker_error_rate)
    
    if np.random.binomial(1, worker_acc):
        return selected_worker_id, items_gt[item_id]
    else:
        return selected_worker_id, 1 - items_gt[item_id]
    
def get_worker_error_rate_estimation(items_votes):
    # min 2 votes per item
    if all(len(v) >= 2 for k, v in items_votes.items()):
        workers_to_int = writeToEMFormat(items_votes)

        gammas, difficulties, posteriors = get_results()

        worker_keys = list(workers_to_int.keys())
        worker_int_keys = list(workers_to_int.values())

        return {worker_keys[worker_int_keys.index(key)]: gamma for key, gamma in enumerate(gammas)}
    else:
        return {}


def get_error_rate(worker_id, estimated_error_rates, avg_error_rate):
    if worker_id in estimated_error_rates.keys():
        return estimated_error_rates[worker_id]
    elif len(estimated_error_rates) != 0:
        return sum(estimated_error_rates.values()) / len(estimated_error_rates)  # AVG over known workers
    else:
        return avg_error_rate


def get_worker_error_rate(worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted):
    if estimate_after:
        if have_submitted:
            return get_error_rate(worker_id, estimated_error_rates, avg_error_rate)
        else:
            return avg_error_rate
    else:
        return get_error_rate(worker_id, estimated_error_rates, avg_error_rate)

#data utils
'''
    items_num - number of items
    possitive_percentage - [0,1] percentage of possitive items
'''
def generate_gold_data(items_num, possitive_percentage):
    pos_items_number = int(round(((possitive_percentage * 100) * items_num) / 100))     
    gold_data = ([1] * pos_items_number) + ([0] * (items_num - pos_items_number))
    random.shuffle(gold_data)

    return gold_data

def solve(num_states, states_difficulties, avg_error_rate, policy, workers_error_rates, items_difficulties, items_gt , estimate_after = True):
    
    num_items = len(items_gt)
    
    actions = range(0, 3) # 0,1,2
    #states_difficulties = getDifficulties(0.1)

    items_votes = {}
    for item_id in range(num_items):
        items_votes[item_id] = {}

    # init beliefs
    belief = [1 for i in range(num_states)]
    belief[num_states-1] = 0  # last states = 0, terminating state
    belief = normalize(belief)
    beliefs = [deepcopy(belief) for i in range(num_items)]
    
    answers = [CONST_NO_ANSWER for i in range(0, num_items)]
    
    iteration_number = 0
    while are_items_unresolved(answers):
        iteration_number += 1
        
        items_to_vote = []
        unresolved_items = get_unresolved_items(answers)
        unresolved_items_num = len(unresolved_items)
        
        for item_id in unresolved_items:
            beliefState = beliefs[item_id]
            bestAction = findBestAction(actions, policy, beliefState)
            bestAction = int(bestAction)
            
            if bestAction == CONST_REQUEST_VOTE:
                items_to_vote.append(item_id)
            elif bestAction == CONST_SUBMIT_ZERO or bestAction == CONST_SUBMIT_ONE:
                if bestAction == CONST_SUBMIT_ZERO:
                    answers[item_id] = 0
                else:
                    answers[item_id] = 1

        #end for
        
        have_submitted = unresolved_items_num != num_items
        
        for item_to_vote in items_to_vote:
            worker_id, vote = get_worker_vote(item_to_vote, items_votes, items_difficulties, items_gt, workers_error_rates)
            items_votes[item_to_vote][worker_id] = vote
        
        
        estimated_error_rates = get_worker_error_rate_estimation(items_votes)
        
        for item_id in items_to_vote:
            last_vote = list(items_votes[item_id].values())[-1]
            last_worker_id = list(items_votes[item_id])[-1]
            beliefs[item_id] = updateBelief(beliefs[item_id], last_vote, states_difficulties,
                                     get_worker_error_rate(last_worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted))
            
        #print(f"Num to vote: {len(items_to_vote)}")
    #end while
            
    return answers, items_votes

In [2]:
def genPOMDP(filename, fnc, fpc, cost_vote, gammas, numberOfWorkerPools, difficulties=None):

    if difficulties is None:
        difficulties = getDifficulties(0.1)

    numDiffs = len(difficulties)

    reward_correct_answer = 0
    
    #Add one absorbing state
    numberOfStates = ((numDiffs) * 2) + 1
    numberOfActions = numberOfWorkerPools + 2
    file = open(filename, 'w')
    file.write('discount: 0.9999\n')
    file.write('values: reward\n')
    file.write('states: %d\n' % numberOfStates)
    file.write('actions: %d\n' % numberOfActions)
    SUBMITZERO = numberOfWorkerPools  # 2 worker pools
    SUBMITONE = numberOfWorkerPools + 1  # 3 , for 2 worker pools
    file.write('observations: Zero One None\n')

    for i in range(0, numberOfStates):
        for k in range(0, numberOfWorkerPools):
            file.write('T: %d : %d : %d %f\n' % (k, i, i, 1.0))

    #Add transitions to absorbing state
    file.write('T: %d : * : %d %f\n' % (SUBMITZERO, numberOfStates-1, 1.0))
    file.write('T: %d : * : %d %f\n' % (SUBMITONE, numberOfStates-1, 1.0))

    #Add observations in absorbing state
    file.write('O: * : %d : None %f\n' % (numberOfStates-1, 1.0))

    for v in range(0, 2):
        for diffState in range(0,numDiffs):
        #for diffState in product(range(numDiffs), repeat = numberOfWorkerPools):
            state = v * numDiffs + diffState
            """for k in range(0, numberOfWorkerPools):
                state += (diffState[k] * (numDiffs ** (numberOfWorkerPools - (k+1))))"""
            file.write('O: %d: %d : None %f\n' % (SUBMITZERO, state, 1.0))
            file.write('O: %d: %d : None %f\n' % (SUBMITONE, state, 1.0))
            if v == 0: #if the true answer is 0
                for k in range(0, numberOfWorkerPools):
                    file.write('O: %d : %d : Zero %f\n' % 
                               (k, state, calcAccuracy(gammas[k], difficulties[diffState])))
                    # gamma: shape * scale. i.e: gamma(4,0.42) = 1.68
                    file.write('O: %d : %d : One %f\n' % 
                               (k, state, 1.0 - calcAccuracy(gammas[k], difficulties[diffState])))
            else: # if the answer is 1
                for k in range(0, numberOfWorkerPools):
                    file.write('O: %d : %d : Zero %f\n' % 
                               (k, state, 1.0 - calcAccuracy(gammas[k], difficulties[diffState])))
                    file.write('O: %d : %d : One %f\n' % 
                               (k, state, calcAccuracy(gammas[k], difficulties[diffState])))


    for v in range(numberOfWorkerPools):
        file.write('R: %d : * : * : * %f\n' % (v, cost_vote))  # reward request more vote


    for i in range(numberOfStates-1):
        if i < (numberOfStates-1) / 2:  # true label = 0
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITZERO, i, numberOfStates-1, reward_correct_answer))
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITONE, i, numberOfStates-1, fpc))
        else:  # true label = 1 
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITONE, i, numberOfStates-1, reward_correct_answer))
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITZERO, i, numberOfStates-1, fnc))

    #Add rewards in absorbing state
    file.write('R: * : %d : %d : * %f\n' % (numberOfStates-1, numberOfStates-1, 0))

    file.close()

### Base config with diff wrong costs

In [3]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

state_diff = getDifficulties(0.1)

#wrong_answer_costs = [10,20,50,500,1000]
wrong_answer_costs = [50]

for moment_error_estimation in [False, True]:
    for wrong_cost in wrong_answer_costs:
        total_results = []

        #policy_name = f"unclassified{unclassify_val}.policy"
        policy_name = f"23states_base-c{wrong_cost}.policy"
        policy = readPolicy(policy_path + policy_name, states_num)

        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for _ in range(10):
            answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

            costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
            # end for iterations

        result = [f"base-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

        total_results.append(result)
        pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

    #end for



### Base config with differents FNC(false negative cost) and FPC(false positive cost)

In [None]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

state_diff = getDifficulties(0.1)

fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

for moment_error_estimation in [False, True]:
    for fnc in fncs:
        for fpc in fpcs:
            total_results = []

            policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []
            wces = []

            for _ in range(10):
                answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
                
                loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                wces.append(wce)
                # end for iterations

            result = [f"wrong-cost-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                      item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                     round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                     round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                      round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

        #end for


### Different state numbers

#### # states

In [9]:
#pd.DataFraimport algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)


policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

states_nums = [5,9,13,17,43,203]
states_diffs = [[0,1], #5 states
         [0, 0.33, 0.66, 1], #9 states
         [0, 0.2, 0.4, 0.6, 0.8, 1], # 13 states
         [0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1], #17 states
         np.arange(0, 1.05, .05), #43
         np.arange(0, 1.01, .01) # 203
        ]

wrong_answer_costs = [10,20,50,500,1000]

for ind, states_num in enumerate(states_nums):
    for moment_error_estimation in [False, True]:
        for wrong_cost in wrong_answer_costs:
            total_results = []
            
            state_diff = states_diffs[ind]

            #policy_name = f"unclassified{unclassify_val}.policy"
            policy_name = f"{states_num}states-c{wrong_cost}.policy"
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []

            for _ in range(10):
                answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
                loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                # end for iterations

            result = [f"vary_num_states-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                      item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                     round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                     round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

        #end for

#### modify fnc and fpc

In [16]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_nums = [5,9,13,17,43,203]
states_diffs = [[0,1], #5 states
         [0, 0.33, 0.66, 1], #9 states
         [0, 0.2, 0.4, 0.6, 0.8, 1], # 13 states
         [0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1], #17 states
         np.arange(0, 1.05, .05), #43
         np.arange(0, 1.01, .01) # 203
        ]


policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

num_pools = 1
gamma_media = [1]
cost_request_vote = -1
fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

for ind, states_num in enumerate(states_nums):
    for moment_error_estimation in [False, True]:
        for fnc in fncs:
            for fpc in fpcs:
                total_results = []
                
                state_diff = states_diffs[ind]

                name = f'vc-{states_num}states-fnc{fnc}-fpc{fpc}'
                policy_name = f'{name}.policy'
                policy = readPolicy(policy_path + policy_name, states_num)

                losses = []
                recalls = []
                precisions = []
                costs = []
                f_ones = []
                f_betas = []
                wces = []

                for _ in range(10):
                    answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                    costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                    loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                    losses.append(loss)
                    recalls.append(recall)
                    precisions.append(precision)
                    f_ones.append(f1)
                    f_betas.append(f_beta)
                    wces.append(wce)
                    # end for iterations

                result = [f"diff-states-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                          item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                         round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                         round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                          round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

                total_results.append(result)
                pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

            #end for


### Bimodal 23 states

In [19]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

#bimodal
workers_error_rates1 = np.random.normal(.2, .01, int(workers_num/2))
workers_error_rates2 = np.random.normal(4, .2, int(workers_num/2))
workers_error_rates = np.concatenate((workers_error_rates1, workers_error_rates2), axis=0)
dist_name = "BiModal"
normal_mean = 1
normal_std = 0.2


states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"


wrong_answer_costs = [10,20,50,500,1000]
state_diff = getDifficulties(0.1)

for moment_error_estimation in [False, True]:
    for wrong_cost in wrong_answer_costs:
        total_results = []

        #policy_name = f"unclassified{unclassify_val}.policy"
        policy_name = f"23states_base-c{wrong_cost}.policy"
        policy = readPolicy(policy_path + policy_name, states_num)

        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for _ in range(10):
            answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

            costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
            # end for iterations

        result = [f"bimodal-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

        total_results.append(result)
        pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

    #end for



### Bimodal with diff fnc and fpc

In [9]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

#bimodal
workers_error_rates1 = np.random.normal(.2, .01, int(workers_num/2))
workers_error_rates2 = np.random.normal(4, .2, int(workers_num/2))
workers_error_rates = np.concatenate((workers_error_rates1, workers_error_rates2), axis=0)
dist_name = "BiModal"
normal_mean = 1
normal_std = 0.2


states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"


fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

state_diff = getDifficulties(0.1)

for moment_error_estimation in [False, True]:
    for fnc in fncs:
            for fpc in fpcs:
                total_results = []

    
                policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
                policy = readPolicy(policy_path + policy_name, states_num)

                losses = []
                recalls = []
                precisions = []
                costs = []
                f_ones = []
                f_betas = []
                wces = []

                for _ in range(10):
                    answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                    costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                    loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                    losses.append(loss)
                    recalls.append(recall)
                    precisions.append(precision)
                    f_ones.append(f1)
                    f_betas.append(f_beta)
                    wces.append(wce)

                result = [f"bimodal-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                          item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                         round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                         round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                          round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

                total_results.append(result)
                pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

            #end for

### Good and bad workers

In [5]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 1000

avg_error_rate = 1

#bimodal
#workers_error_rates1 = np.random.normal(.2, .01, int(workers_num/2))
#workers_error_rates2 = np.random.normal(4, .2, int(workers_num/2))
#workers_error_rates = np.concatenate((workers_error_rates1, workers_error_rates2), axis=0)
dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(3, .2, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

#policy = readPolicy("/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/s9-500.policy", states_num)

wrong_answer_costs = [10,20,50] #[10,20,50,500,1000]
state_diff = getDifficulties(0.1)

for moment_error_estimation in [True]:#[False, True]:
    for wrong_cost in wrong_answer_costs:
        total_results = []

        #policy_name = f"unclassified{unclassify_val}.policy"
        policy_name = f"23states_base-c{wrong_cost}.policy"
        policy = readPolicy(policy_path + policy_name, states_num)

        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for _ in range(10):
            answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

            costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
            # end for iterations

        result = [f"bad-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

        total_results.append(result)
        pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

    #end for



### good-bad workers diff fnc and fpc

In [12]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 10000

avg_error_rate = 1

#good-bad
#workers_error_rates = np.random.normal(.2, .01, workers_num) #good
workers_error_rates = np.random.normal(4, .2, workers_num) # bad
dist_name = "Normal"
normal_mean = 1
normal_std = 0.2


states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"


fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

state_diff = getDifficulties(0.1)

for moment_error_estimation in [False, True]:
    for fnc in fncs:
            for fpc in fpcs:
                total_results = []

    
                policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
                policy = readPolicy(policy_path + policy_name, states_num)

                losses = []
                recalls = []
                precisions = []
                costs = []
                f_ones = []
                f_betas = []
                wces = []

                for _ in range(10):
                    answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                    costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                    loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                    losses.append(loss)
                    recalls.append(recall)
                    precisions.append(precision)
                    f_ones.append(f1)
                    f_betas.append(f_beta)
                    wces.append(wce)

                result = [f"bad-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                          item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                         round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                         round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                          round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

                total_results.append(result)
                pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

            #end for

### Change dataset balance

In [6]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000

item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

state_diff = getDifficulties(0.1)

wrong_answer_costs = [10,20,50,500,1000]

balances = [.01, .1, .3]

for balance in balances: 
    possitive_percentage = balance
    items_ground_truth = generate_gold_data(items_num, possitive_percentage)

    for moment_error_estimation in [False, True]:
        for wrong_cost in wrong_answer_costs:
            total_results = []

            policy_name = f"23states_base-c{wrong_cost}.policy"
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []

            for _ in range(10):
                answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
                loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                # end for iterations

            result = [f"unbalanced-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                      item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                     round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                     round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

        #end for



### unbalance with different fnc and fpc

In [17]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000

item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
loss_ratio = 5


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

state_diff = getDifficulties(0.1)

wrong_answer_costs = [10,20,50,500,1000]

balances = [.01, .1, .3]

fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

for balance in balances: 
    possitive_percentage = balance
    items_ground_truth = generate_gold_data(items_num, possitive_percentage)

    for moment_error_estimation in [False, True]:
        for fnc in fncs:
            for fpc in fpcs:
                total_results = []

                policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
                policy = readPolicy(policy_path + policy_name, states_num)

                losses = []
                recalls = []
                precisions = []
                costs = []
                f_ones = []
                f_betas = []
                wces = []

                for _ in range(10):
                    answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                    costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                    loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                    losses.append(loss)
                    recalls.append(recall)
                    precisions.append(precision)
                    f_ones.append(f1)
                    f_betas.append(f_beta)
                    wces.append(wce)

                result = [f"unbalanced-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                              item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                             round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                             round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                              round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

                total_results.append(result)
                pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

            #end for



### Unclassify action

In [8]:
CONST_UNCLASSIFIED = 3

def solve_unclassify(num_states, states_difficulties, avg_error_rate, policy, workers_error_rates, items_difficulties, items_gt , estimate_after = True, expert_cost = 20):
    
    num_items = len(items_gt)
    
    actions = range(0, 4) # 0,1,2,3
    #states_difficulties = getDifficulties(0.1)

    items_votes = {}
    for item_id in range(num_items):
        items_votes[item_id] = {}

    # init beliefs
    belief = [1 for i in range(num_states)]
    belief[num_states-1] = 0  # last states = 0, terminating state
    belief = normalize(belief)
    beliefs = [deepcopy(belief) for i in range(num_items)]
    
    answers = [CONST_NO_ANSWER for i in range(0, num_items)]
    
    iteration_number = 0
    while are_items_unresolved(answers):
        iteration_number += 1
        
        items_to_vote = []
        unresolved_items = get_unresolved_items(answers)
        unresolved_items_num = len(unresolved_items)
        
        for item_id in unresolved_items:
            beliefState = beliefs[item_id]
            bestAction = findBestAction(actions, policy, beliefState)
            bestAction = int(bestAction)
            
            if bestAction == CONST_REQUEST_VOTE:
                items_to_vote.append(item_id)
            elif bestAction == CONST_SUBMIT_ZERO or bestAction == CONST_SUBMIT_ONE or bestAction == CONST_UNCLASSIFIED:
                
                if bestAction == CONST_UNCLASSIFIED:
                    fake_w_id = 1100
                    for _ in range(expert_cost):
                        items_votes[item_id][fake_w_id] = items_gt[item_id]
                    fake_w_id += 1

                    answers[item_id] = items_gt[item_id]
                elif bestAction == CONST_SUBMIT_ZERO:
                    answers[item_id] = 0
                else:
                    answers[item_id] = 1

        #end for
        
        have_submitted = unresolved_items_num != num_items
        
        for item_to_vote in items_to_vote:
            worker_id, vote = get_worker_vote(item_to_vote, items_votes, items_difficulties, items_gt, workers_error_rates)
            items_votes[item_to_vote][worker_id] = vote
        
        
        estimated_error_rates = get_worker_error_rate_estimation(items_votes)
        
        for item_id in items_to_vote:
            last_vote = list(items_votes[item_id].values())[-1]
            last_worker_id = list(items_votes[item_id])[-1]
            beliefs[item_id] = updateBelief(beliefs[item_id], last_vote, states_difficulties,
                                     get_worker_error_rate(last_worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted))
            
        #print(f"Num to vote: {len(items_to_vote)}")
    #end while
            
    return answers, items_votes

In [26]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 1000

avg_error_rate = 1

#bimodal
#workers_error_rates1 = np.random.normal(.2, .01, int(workers_num/2))
#workers_error_rates2 = np.random.normal(4, .2, int(workers_num/2))
#workers_error_rates = np.concatenate((workers_error_rates1, workers_error_rates2), axis=0)
dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(3, .2, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

wrong_answer_costs = [10,20,50,500,1000]
unclassify_costs = [5,10,20,200,500]

state_diff = getDifficulties(0.1)

for moment_error_estimation in [False, True]:
    for ind in range(len(wrong_answer_costs)):
        wrong_cost = wrong_answer_costs[ind]
        unclassify_cost = unclassify_costs[ind]
        
        total_results = []

        policy_name = f'unclassified-w{wrong_cost}-u{unclassify_cost}.policy'
        policy = readPolicy(policy_path + policy_name, states_num)

        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for _ in range(10):
            answers, items_votes = solve_unclassify(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

            costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
        # end for iterations

        result = [f"unclassify-c{wrong_cost}-u{unclassify_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

        total_results.append(result)
        pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)

    #end for



### Unclassify action with different fnc and fpc

In [9]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 1000

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(1, .2, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

state_diff = getDifficulties(0.1)

for moment_error_estimation in [False, True]:
    for fnc in fncs:
        for fpc in fpcs:
            
            ucc = min(abs(fnc), abs(fpc)) / 2
            total_results = []
            
            policy_name = f'unclassify-fnc{fnc}-fpc{fpc}-u{ucc}.policy'
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []
            wces = []

            for _ in range(10):
                answers, items_votes = solve_unclassify(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                wces.append(wce)

            # end for iterations

            result = [f"unclassify-fnc{fnc}-fpc{fpc}-u{ucc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                  round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

            
            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

        #end for


### Confusion Matrix

In [3]:
def get_worker_vote_confusion(item_id, items_votes, items_difficulties, items_gt, workers_error_rates):
    selected_worker_id = get_random_worker_id(workers_error_rates, item_id, items_votes)
    
    if items_gt[item_id] == 0:
        worker_acc = get_accuracy(items_difficulties[item_id], workers_error_rates[0][selected_worker_id])
    else:
        worker_acc = get_accuracy(items_difficulties[item_id], workers_error_rates[1][selected_worker_id])

    if np.random.binomial(1, worker_acc):
        return selected_worker_id, items_gt[item_id]
    else:
        return selected_worker_id, 1 - items_gt[item_id]
    
def get_random_worker_id(workers_error_rates, item_id, votes):
    item_votes = votes[item_id].copy()
    worker_ids_used = item_votes.keys()
    workers_ids_range = [k for k,v in enumerate(workers_error_rates[0])]
    workers_ids_unused = [val for val in workers_ids_range if val not in worker_ids_used]
    
    if (len(workers_ids_unused) == 0):
        used = len(worker_ids_used)
        ranges = len(workers_ids_range)
        unu = len(workers_ids_unused)
        print(f'used: {used}')
        print(f'workers: {ranges}')
        print(f'unused: {unu}')
        raise ValueError("Unused empty!?")
    
    selected_worker_id = np.random.choice(workers_ids_unused)

    return selected_worker_id
    
def solve_confusion(num_states, states_difficulties, avg_error_rate, policy, workers_error_rates, items_difficulties, items_gt , estimate_after = True):
    
    num_items = len(items_gt)
    
    actions = range(0, 3) # 0,1,2
    #states_difficulties = getDifficulties(0.1)

    items_votes = {}
    for item_id in range(num_items):
        items_votes[item_id] = {}

    # init beliefs
    belief = [1 for i in range(num_states)]
    belief[num_states-1] = 0  # last states = 0, terminating state
    belief = normalize(belief)
    beliefs = [deepcopy(belief) for i in range(num_items)]
    
    answers = [CONST_NO_ANSWER for i in range(0, num_items)]
    
    iteration_number = 0
    while are_items_unresolved(answers):
        iteration_number += 1
        
        items_to_vote = []
        unresolved_items = get_unresolved_items(answers)
        unresolved_items_num = len(unresolved_items)
        
        for item_id in unresolved_items:
            beliefState = beliefs[item_id]
            bestAction = findBestAction(actions, policy, beliefState)
            bestAction = int(bestAction)
            
            if bestAction == CONST_REQUEST_VOTE:
                items_to_vote.append(item_id)
            elif bestAction == CONST_SUBMIT_ZERO or bestAction == CONST_SUBMIT_ONE:
                if bestAction == CONST_SUBMIT_ZERO:
                    answers[item_id] = 0
                else:
                    answers[item_id] = 1

        #end for
        
        have_submitted = unresolved_items_num != num_items
        
        for item_to_vote in items_to_vote:
            worker_id, vote = get_worker_vote_confusion(item_to_vote, items_votes, items_difficulties, items_gt, workers_error_rates)
            items_votes[item_to_vote][worker_id] = vote
        
        
        estimated_error_rates = get_worker_error_rate_estimation(items_votes)
        
        for item_id in items_to_vote:
            last_vote = list(items_votes[item_id].values())[-1]
            last_worker_id = list(items_votes[item_id])[-1]
            beliefs[item_id] = updateBelief(beliefs[item_id], last_vote, states_difficulties,
                                     get_worker_error_rate(last_worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted))
            
        #print(f"Num to vote: {len(items_to_vote)}")
    #end while
            
    return answers, items_votes

In [38]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','wrong_cost','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 1000

avg_error_rate = 1

dist_name = "NormalConfusion"
normal_mean = 1
normal_std = 0.2

workers_error_rates = []
acc_pos = np.random.normal(1, .2, workers_num)
workers_error_rates.append([w_acc_pos * .5 for w_acc_pos in acc_pos]) # 10% higher than positive
workers_error_rates.append(acc_pos) # possitive error rate ±75


states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"


wrong_answer_costs = [10,20,50,500,1000]
state_diff = getDifficulties(0.1)


for moment_error_estimation in [False, True]:
    for wrong_cost in wrong_answer_costs:
        total_results = []

        policy_name = f"23states_base-c{wrong_cost}.policy"
        policy = readPolicy(policy_path + policy_name, states_num)

        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for _ in range(10):
            answers, items_votes = solve_confusion(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

            costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
        # end for iterations

        result = [f"confusion-matrix-c{wrong_cost}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, wrong_cost, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate]

        total_results.append(result)
        pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results.csv", mode='a', index=False, header=False)
    #end for



### Confusion with different FNC and TPC

In [4]:
import algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)
loss_ratio = 5


workers_num = 1000

avg_error_rate = 1

dist_name = "NormalConfusion"
normal_mean = 1
normal_std = 0.2

workers_error_rates = []
acc_pos = np.random.normal(1, .2, workers_num)
workers_error_rates.append([w_acc_pos * .5 for w_acc_pos in acc_pos]) # 10% higher than positive
workers_error_rates.append(acc_pos) # possitive error rate ±75


fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"


wrong_answer_costs = [10,20,50,500,1000]
state_diff = getDifficulties(0.1)


for moment_error_estimation in [False, True]:
    for fnc in fncs:
        for fpc in fpcs:
            total_results = []

            #policy_name = f"23states_base-c{wrong_cost}.policy"
            policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []
            wces = []

            for _ in range(10):
                answers, items_votes = solve_confusion(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    

                loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                wces.append(wce)

            result = [f"confusion-matrix-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                  item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                 round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                 round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                  round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]


            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)
        #end for



### check workers accs mean

In [16]:
def get_acc(diffs, e):
    accs = []
    for d in diffs:
        accs.append(1/2 * (1 + math.pow((1 - d),e)))

    return np.mean(accs)

diffs = np.arange(0, 1.1, 0.1)
workers_error = np.random.normal(.2,.01,1000)
workers_acc = [get_acc(diffs, e) for e in workers_error]

print(np.mean(workers_acc))



0.8916297807595583


## Generate and evaluate Dai'sresults

In [1]:
from ModelLearning.genPOMDP import *
from os import system

diffs = [[0,1], #5 states
         [0, 0.33, 0.66, 1], #9 states
         [0, 0.2, 0.4, 0.6, 0.8, 1], # 13 states
         [0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1] #17 states
        ]

wrong_answer_costs = [10,20,50,500,1000]

#filename, wronganswercost,createjobcost,distributionavg,numpools
for wrong_cost in wrong_answer_costs:
    for diff in diffs:
        num_st = len(diff) * 2 + 1
        name = f'{num_st}states-c{wrong_cost}'
        genPOMDP(f'log/pomdp/{name}.pomdp', -1 * wrong_cost, [1], [1], 1, diff)
        system(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/WorkerPoolSelection/ModelLearning/zmdp-1.1.7/bin/darwin18/zmdp solve log/pomdp/{name}.pomdp -o ModelLearning/Policies/{name}.policy -t 300")
    

### Generate 23 states base

In [3]:
from ModelLearning.genPOMDP import *
from os import system
from ModelLearning.utils import *
diff = getDifficulties(0.1)
num_st = len(diff)
wrong_answer_costs = [10,20,50,500,1000,5000]

#worker error rate Normal(1,0.2)

for wrong_cost in wrong_answer_costs:
    genPOMDP(f'log/pomdp/23states_base-c{wrong_cost}.pomdp', -1 * wrong_cost, [1], [1], 1, diffs)

#### solve

In [4]:
for wrong_cost in wrong_answer_costs:
    system(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/WorkerPoolSelection/ModelLearning/zmdp-1.1.7/bin/darwin18/zmdp solve log/pomdp/23states_base-c{wrong_cost}.pomdp -o ModelLearning/Policies/23states_base-c{wrong_cost}.policy -t 300")

### Generate 24states with unclassified pomdp

In [10]:
from ModelLearning.genPOMDP import *
from os import system
from ModelLearning.utils import *
diff = getDifficulties(0.1)
num_st = len(diff)
wrong_cost = -500
genPOMDP('log/pomdp/unclassified.pomdp', wrong_cost, [1], [1], 1, diff)

In [17]:
for diff_v in diffs:
    for c in [0,1]:
        for diff in diff_v:
            print(f"({diff},{c})", end="", flush=True)
    print("\n")
    

(0,0)(1,0)(0,1)(1,1)

(0,0)(0.33,0)(0.66,0)(1,0)(0,1)(0.33,1)(0.66,1)(1,1)

(0,0)(0.2,0)(0.4,0)(0.6,0)(0.8,0)(1,0)(0,1)(0.2,1)(0.4,1)(0.6,1)(0.8,1)(1,1)

(0,0)(0.15,0)(0.3,0)(0.45,0)(0.6,0)(0.75,0)(0.9,0)(1,0)(0,1)(0.15,1)(0.3,1)(0.45,1)(0.6,1)(0.75,1)(0.9,1)(1,1)



## Run experiment

In [10]:
from os import system

sts = [5, 9, 13, 17]
for st in sts:
    system(f"/bin/bash run_experiment.sh 300 1000 0.5 {st}")

In [None]:
system(f"/bin/bash run_experiment.sh 300 1000 0.5 24")

## Process results and show

In [13]:
import algorithms_utils as alg_utils
import pandas as pd
import numpy as np

def round_to_3(value):
    return round(value, 3)

WP0_num,WP0_mean,WP0_stddev,WP1_num,WP1_mean,WP1_stddev = 1000,4.000000,0.300000,1000,4.000000,0.300000
#name = "%d,%.2f,%.2f,%d,%.2f,%.2f" % (WP0_num,WP0_mean,WP0_stddev,WP1_num,WP1_mean,WP1_stddev)


results = { 'base': {'path': 'base - 100,1.00,0.20,100,1.00,0.20', 'states': '23'},
            'unclassified': {'path': 'unclassified - 100,1.00,0.20,100,1.00,0.20', 'states': '23'},
            '5states': {'path': 's5 - 100,1.00,0.20,100,1.00,0.20', 'states': '5'},
            '9states': {'path': 's9 - 100,1.00,0.20,100,1.00,0.20', 'states': '9'},
            '13states': {'path': 's13 - 100,1.00,0.20,100,1.00,0.20', 'states': '13'},
            '17states': {'path': 's17 - 100,1.00,0.20,100,1.00,0.20', 'states': '17'},
            'bimodal': {'path': 'diff_rates - 100,1.00,0.20,100,1.00,0.20', 'states': '23'},
           }



for name, result in results.items():
    print(name)
    st = result['states']
    print(f"Num States: {st}")
    
    RELATIVEPATH = f"Experiments/{result['path']}"

    NUMBER_OF_REPETITIONS = 5
    loss_ratio = 5
    total_cost_mean = 0.
    initial_line = 8
    #wrong_answer_costs = [10,500,1000,5000,10000,15000]
    wrong_answer_costs = [500]
    NUMBER_OF_ITEMS = 1000
    worker_num = 100
    data_bal = 0.5

    for wrongAnswerCost in wrong_answer_costs:
        losses = []
        recalls = []
        precisions = []
        costs = []
        f_ones = []
        f_betas = []

        for iteration in range(NUMBER_OF_REPETITIONS):
            iteration_results_file = open(RELATIVEPATH + "/%d/Unstarred/%d" % (wrongAnswerCost,iteration),'r')
            gt_vals = []
            classification_vals = []
            iteration_cost = []
            for i, line in enumerate(iteration_results_file):
                if (initial_line - 2 < i < (initial_line + NUMBER_OF_ITEMS - 1)):
                    item_id, y_val, gt_val, item_cost, item_difficulty, item_real_difficulty,  = line.rstrip().split(",")
                    classification_vals.append(int(y_val))
                    gt_vals.append(int(gt_val))
                    iteration_cost.append(float(item_cost))
            # end for results file

            costs.append(np.mean(iteration_cost))    
            loss, recall, precision, f1, beta, f_beta = alg_utils.Metrics.compute_metrics(classification_vals, gt_vals, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
            f_ones.append(f1)
            f_betas.append(f_beta)
        # end for iterations

        print(f"Results for cost: {wrongAnswerCost}:")
        print(f"Cost mean: {round_to_3(np.mean(costs))} - Std: {round_to_3(np.std(costs))}")
        print(f"Recall mean: {round_to_3(np.mean(recalls))} - Std: {round_to_3(np.std(recalls))}")
        print(f"Precision mean: {round_to_3(np.mean(precisions))} - Std: {round_to_3(np.std(precisions))}")
        print(f"Loss mean: {round_to_3(np.mean(losses))} - Std: {round_to_3(np.std(losses))}")
        print(f"F-1: {round_to_3(np.mean(f_ones))} - Std: {round_to_3(np.std(f_ones))}")
        print(f"F-beta: {round_to_3(np.mean(f_betas))} - Std: {round_to_3(np.std(f_betas))}")

        norm_cost = round_to_3(np.mean(costs)) * NUMBER_OF_ITEMS * 0.05
        norm_cost_std = round_to_3(np.std(costs)) * NUMBER_OF_ITEMS * 0.05

        print("  ---  ")

base
Num States: 23
Results for cost: 500:
Cost mean: 7.764 - Std: 0.309
Recall mean: 0.964 - Std: 0.012
Precision mean: 0.966 - Std: 0.009
Loss mean: 0.106 - Std: 0.033
F-1: 0.965 - Std: 0.01
F-beta: 0.965 - Std: 0.011
  ---  
unclassified
Num States: 23
Results for cost: 500:
Cost mean: 7.88 - Std: 0.481
Recall mean: 0.955 - Std: 0.023
Precision mean: 0.956 - Std: 0.018
Loss mean: 0.135 - Std: 0.067
F-1: 0.956 - Std: 0.02
F-beta: 0.955 - Std: 0.022
  ---  
5states
Num States: 5
Results for cost: 500:
Cost mean: 1.0 - Std: 0.0
Recall mean: 0.766 - Std: 0.013
Precision mean: 0.761 - Std: 0.015
Loss mean: 0.716 - Std: 0.039
F-1: 0.764 - Std: 0.011
F-beta: 0.765 - Std: 0.011
  ---  
9states
Num States: 9
Results for cost: 500:
Cost mean: 5.9 - Std: 0.063
Recall mean: 0.947 - Std: 0.007
Precision mean: 0.948 - Std: 0.007
Loss mean: 0.163 - Std: 0.022
F-1: 0.947 - Std: 0.007
F-beta: 0.947 - Std: 0.007
  ---  
13states
Num States: 13
Results for cost: 500:
Cost mean: 7.276 - Std: 0.104
Reca

# Dai's Results over real-world datasets

In [6]:
import algorithms_utils as alg_utils
import pandas as pd
import numpy as np

def round_to_3(value):
    return round(value, 3)

WP0_num,WP0_mean,WP0_stddev,WP1_num,WP1_mean,WP1_stddev = 1000,4.000000,0.300000,1000,4.000000,0.300000
#name = "%d,%.2f,%.2f,%d,%.2f,%.2f" % (WP0_num,WP0_mean,WP0_stddev,WP1_num,WP1_mean,WP1_stddev)

datasets = ['BarzanMozafari', 'RTE', 'SpamCF', 'TEMP', 'WVSCM']
RELATIVEPATH = "Experiments/Results/RealWorld/"

NUMBER_OF_REPETITIONS = 6
loss_ratio = 5
total_cost_mean = 0.
initial_line = 8
wrong_answer_costs = [500]

gammas_dist = [(4.,0.42),(4.,0.15),(4.,0.4),(4.,0.14),(4.,0.5)]

for key,name in enumerate(datasets):
    print(name)

    for wrongAnswerCost in wrong_answer_costs:
        ground_truth, workers_accuracy = alg_utils.get_real_dataset_data(name)
        NUMBER_OF_ITEMS = len(ground_truth)
        
        data_bal = sum(ground_truth) / NUMBER_OF_ITEMS
        worker_num = len(workers_accuracy)
        
        losses = []
        recalls = []
        precisions = []
        costs = []

        for iteration in range(NUMBER_OF_REPETITIONS):
            iteration_results_file = open(RELATIVEPATH + "%s/%d/Unstarred/%d" % (name,wrongAnswerCost,iteration),'r')
            gt_vals = []
            classification_vals = []
            iteration_cost = 0.
            for i, line in enumerate(iteration_results_file):
                if (initial_line - 1 < i < (initial_line + NUMBER_OF_ITEMS - 1)):
                    item_id, y_val, gt_val, item_cost, item_difficulty, item_real_difficulty,  = line.rstrip().split(",")
                    classification_vals.append(int(y_val))
                    gt_vals.append(int(gt_val))
                    iteration_cost += float(item_cost)
            # end for results file

            costs.append(iteration_cost / NUMBER_OF_ITEMS)    
            loss, recall, precision = alg_utils.Metrics.compute_metrics(classification_vals, gt_vals, loss_ratio)
            losses.append(loss)
            recalls.append(recall)
            precisions.append(precision)
        # end for iterations

        print(f"Results for cost: {wrongAnswerCost}:")
        print(f"Cost mean: {round_to_3(np.mean(costs))} - Std: {round_to_3(np.std(costs))}")
        print(f"Recall mean: {round_to_3(np.mean(recalls))} - Std: {round_to_3(np.std(recalls))}")
        print(f"Precision mean: {round_to_3(np.mean(precisions))} - Std: {round_to_3(np.std(precisions))}")
        print(f"Loss mean: {round_to_3(np.mean(losses))} - Std: {round_to_3(np.std(losses))}")
        
        norm_cost = round_to_3(np.mean(costs)) * NUMBER_OF_ITEMS * 0.05
        norm_cost_std = round_to_3(np.std(costs)) * NUMBER_OF_ITEMS * 0.05
        
        shape,scale = gammas_dist[key]
        norm_gamma = f"{shape}-{scale}"
        
        #loss_ratio,cost_ratio,wrong_answer_cost,name,workers_num,gamma_dist,data_bal,cost,cost_std,loss,loss_std,recall,recall_std,precision,precision_std
        print(f"5,0.05,500,{name},{worker_num},{norm_gamma},{data_bal},{norm_cost},{norm_cost_std},{round_to_3(np.mean(losses))},{round_to_3(np.std(losses))},{round_to_3(np.mean(recalls))},{round_to_3(np.std(recalls))},{round_to_3(np.mean(precisions))},{round_to_3(np.std(precisions))}")
        
        print("  ---  ")
    

BarzanMozafari
Results for cost: 500:
Cost mean: 10.726 - Std: 0.342
Recall mean: 0.914 - Std: 0.015
Precision mean: 0.907 - Std: 0.015
Loss mean: 0.264 - Std: 0.041
5,0.05,500,BarzanMozafari,83,4.0-0.42,0.491,536.3000000000001,17.1,0.264,0.041,0.914,0.015,0.907,0.015
  ---  
RTE
Results for cost: 500:
Cost mean: 3.743 - Std: 0.042
Recall mean: 0.98 - Std: 0.006
Precision mean: 0.981 - Std: 0.006
Loss mean: 0.061 - Std: 0.012
5,0.05,500,RTE,164,4.0-0.15,0.5,149.72,1.6800000000000002,0.061,0.012,0.98,0.006,0.981,0.006
  ---  
SpamCF
Results for cost: 500:
Cost mean: 10.355 - Std: 0.274
Recall mean: 0.92 - Std: 0.025
Precision mean: 0.974 - Std: 0.019
Loss mean: 0.3 - Std: 0.079
5,0.05,500,SpamCF,150,4.0-0.4,0.3069306930693069,52.292750000000005,1.3837000000000002,0.3,0.079,0.92,0.025,0.974,0.019
  ---  
TEMP
Results for cost: 500:
Cost mean: 3.602 - Std: 0.051
Recall mean: 0.978 - Std: 0.005
Precision mean: 0.985 - Std: 0.002
Loss mean: 0.07 - Std: 0.019
5,0.05,500,TEMP,76,4.0-0.14,0.43

# Test compute better values for each action

In [None]:
from ModelLearning.utils import *
from copy import deepcopy

def myfindBestValue(action, hyperplanes, beliefs):
    bestValue = -129837198273981231
    bestHyperplane = []
    amount = 0

    for hyperplane in hyperplanes:
        dontUse = False
        for (b, entry) in zip(beliefs, hyperplane):
            if b != 0 and entry == '*':
                dontUse = True
                break
        if dontUse:
            amount = amount + 1
            continue
        
        value = dot(beliefs, hyperplane)
        if value > bestValue:
            bestHyperplane = hyperplane
            bestValue = value
     
    return bestValue

def myfindBestAction(actions, policy, beliefState):
    bestValue = -1230981239102938019
    bestAction = 0  #Assume there is at least one action
    for action in actions:
        if action in policy:
            value = myfindBestValue(action, policy[action], beliefState)
            if value > bestValue:
                bestValue = value
                bestAction = action
    return bestAction

def findValues(actions, policy, beliefState):
    bestValue = [-1230981239102938019] * len(actions)

    for action in actions:
        if action in policy:
            value = myfindBestValue(action, policy[action], beliefState)
            if value > bestValue[action]:
                bestValue[action] = value

    return bestValue

numberOfWorkerPools = 1
numStates = 23
numberOfProblems = 100
actions = range(0, numberOfWorkerPools+3) # 4 w/ unclassified

#belief = [1 for i in range(numStates)]  # init , equivalent to [1] * numStates
belief = [1] * 11 + [1] * 12 #[1 for i in range(numStates)]
belief[numStates-1] = 0  # last states = 0, terminating state
belief = normalize(belief)
beliefs = [deepcopy(belief) for i in range(numberOfProblems)]

#policies = {'frtdp':'/Users/pmaglione/Documents/pomdp_solve_test/frtdp.policy',
#            'hsvi':'/Users/pmaglione/Documents/pomdp_solve_test/hsvi.policy'}

policies = {'unclassified': '/Users/pmaglione/Repos/adaptive-pomdp-solutions/WorkerPoolSelection/ModelLearning/Policies/unclassified250.policy'}


#values = {'frtdp':0, 'hsvi':0}

for key,path in policies.items():
    policy = readPolicy(path, numStates)
    beliefState = beliefs[0]
    #print(actions)
    print(f"Strategy: {key}")
    key_vals = findValues(actions, policy, beliefState)
    values[key] = key_vals
    print(key_vals)

In [None]:
numStates = 22
numberOfProblems = 10
belief = [1 for i in range(numStates)]  # init , equivalent to [1] * numStates
belief[numStates-1] = 0  # last states = 0, terminating state
belief = normalize(belief)
beliefs = [deepcopy(belief) for i in range(numberOfProblems)]


gammas = [0.628980,
0.580959,
1.281037,
0.987592,
1.373258,
0.801062,
1.488326,
1.523445,
1.448263,
1.126245,
1.046380,
1.418695,
1.118415,
1.142464,
1.032052,
0.860525,
1.418732]

gammas = [1.281037] * 100

difficulties = getDifficulties(0.1)
gen_belief = beliefs[0]
observation = 1

num = 0

for gamma in gammas:
    gen_belief = updateBelief(gen_belief, observation, difficulties, gamma)
    action = findBestAction([0,1,2], policy, gen_belief)
    print(action)
    print(gen_belief)
    #print(gen_belief.index(max(gen_belief)))
    print("---")
    num = num + 1
    if (action != 0):
        print(num)
        break
    observation = np.random.binomial(1, 0.75)


In [None]:
from ModelLearning.utils import *
from copy import deepcopy

def myfindBestValue(action, hyperplanes, beliefs):
    bestValue = -129837198273981231
    bestHyperplane = []
    amount = 0

    for hyperplane in hyperplanes:
        dontUse = False
        for (b, entry) in zip(beliefs, hyperplane):
            if b != 0 and entry == '*':
                dontUse = True
                break
        if dontUse:
            amount = amount + 1
            continue
        
        value = dot(beliefs, hyperplane)
        if value > bestValue:
            bestHyperplane = hyperplane
            bestValue = value
     
    return bestValue

def myfindBestAction(actions, policy, beliefState):
    bestValue = -1230981239102938019
    bestAction = 0  #Assume there is at least one action
    for action in actions:
        if action in policy:
            value = myfindBestValue(action, policy[action], beliefState)
            if value > bestValue:
                bestValue = value
                bestAction = action
    return bestAction

def findValues(actions, policy, beliefState):
    bestValue = [-1230981239102938019] * len(actions)

    for action in actions:
        if action in policy:
            value = myfindBestValue(action, policy[action], beliefState)
            if value > bestValue[action]:
                bestValue[action] = value

    return bestValue

numberOfWorkerPools = 1
numStates = 23
numberOfProblems = 100
actions = range(0, numberOfWorkerPools+3)

belief = [1 for i in range(numStates)]  # init , equivalent to [1] * numStates
belief[numStates-1] = 0  # last states = 0, terminating state
belief = normalize(belief)
beliefs = [deepcopy(belief) for i in range(numberOfProblems)]

def myReadPolicy(path, numStates):
    f = open(path, "r")
    alpha_vectors = {0: [], 1:[], 2:[]}

    action = -1
    for line in f:
        if len(line) == 2:
            action = int(line)
        elif len(line) > 2:
            alpha_vectors[action].append([float(x) for x in line.split()])
    
    return alpha_vectors
    

path = '/Users/pmaglione/Documents/pomdp_solve_test/pomdp-solve'
values = {}

methods = ['enum','twopass','witness','incprune']
beliefState = beliefs[0]
#print(f"BeliefState: {beliefState}")
    
#for method in methods:
file = f"/Users/pmaglione/Documents/pomdp_solve_test/pomdp-solve/unclassified.policy.alpha"

policy = myReadPolicy(file, numStates)

#print(f"Strategy: {method}")
key_vals = findValues(actions, policy, beliefState)
values[key] = key_vals
print(key_vals)
    

# Test 2 states POMDP policy

In [130]:
from ModelLearning.utils import *
from copy import deepcopy
import numpy as np
import random
from truth_finder import expectation_maximization

#metrics
class Metrics:

    @staticmethod
    #k penalization for false negatives
    def compute_metrics(items_classification, gt, lr = 1):
        # FP == False Inclusion
        # FN == False Exclusion
        fp = fn = tp = tn = 0.
        for i in range(len(gt)):
            gt_val = gt[i]
            cl_val = items_classification[i]

            if gt_val and not cl_val:
                fn += 1
            if not gt_val and cl_val:
                fp += 1
            if gt_val and cl_val:
                tp += 1
            if not gt_val and not cl_val:
                tn += 1
                        

        recall = tp / (tp + fn)
        precision = tp / (tp + fp)
        loss = (fp + (fn * lr)) / len(gt)
        
        return loss, recall, precision
#end

def simulate_workers(workers_num, cheaters_prop, fixed_acc, workers_acc, base_acc = .5):
    workers = {}
    for i in range(workers_num):
        if (fixed_acc == False):
            if np.random.binomial(1, cheaters_prop):
                # worker_type is 'rand_ch'
                worker_acc_pos = worker_acc_neg = 0.5
            else:
                # worker_type is 'worker'
                worker_acc_pos = base_acc + (np.random.beta(1, 1) * (1 - base_acc))
                #worker_acc_neg = worker_acc_pos + 0.1 if worker_acc_pos + 0.1 <= 1. else 1.
                worker_acc_neg = worker_acc_pos
        else:
            worker_acc_pos = workers_acc
            worker_acc_neg = worker_acc_pos

        workers[i] = [worker_acc_pos, worker_acc_neg]

    return workers

def get_random_worker_accuracy(workers_accuracy, item_id, votes):
    item_votes = votes[item_id].copy()
    worker_ids_used = item_votes.keys()
    workers_ids_range = workers_accuracy.keys()
    workers_ids_unused = [val for val in workers_ids_range if val not in worker_ids_used]
    
    if (len(workers_ids_unused) == 0):
        used = len(worker_ids_used)
        ranges = len(workers_ids_range)
        unu = len(workers_ids_unused)
        print(f'used: {used}')
        print(f'workers: {ranges}')
        print(f'unused: {unu}')
        raise ValueError("Unused empty!?")
    
    selected_worker_id = np.random.choice(workers_ids_unused)
    worker_acc_pos = workers_accuracy[selected_worker_id][0]
    worker_acc_neg = workers_accuracy[selected_worker_id][1]

    return {'worker_id': selected_worker_id, 'acc_pos':worker_acc_pos, 'acc_neg': worker_acc_neg}


def get_worker_vote(workers_accuracy, i, gt, votes):
    worker_data = get_random_worker_accuracy(workers_accuracy, i, votes)
    worker_id, worker_acc_pos, worker_acc_neg = worker_data['worker_id'], worker_data['acc_pos'], worker_data['acc_neg']

    if (gt[i]):
        worker_acc = worker_acc_pos
    else:
        worker_acc = worker_acc_neg

    if np.random.binomial(1, worker_acc):
        vote = gt[i]
    else:
        vote = 1 - gt[i]

    return (worker_id, vote)

def generate_gold_data(items_num, possitive_percentage):
    pos_items_number = int(round(((possitive_percentage * 100) * items_num) / 100))     
    gold_data = ([1] * pos_items_number) + ([0] * (items_num - pos_items_number))
    random.shuffle(gold_data)

    return gold_data

def findBestValue(action, hyperplanes, beliefs):
    bestValue = -129837198273981231
    bestHyperplane = []
    amount = 0

    for hyperplane in hyperplanes:
        dontUse = False
        for (b, entry) in zip(beliefs, hyperplane):
            if b != 0 and entry == '*':
                dontUse = True
                break
        if dontUse:
            amount = amount + 1
            continue
        
        value = dot(beliefs, hyperplane)
        if value > bestValue:
            bestHyperplane = hyperplane
            bestValue = value
     
    return bestValue

def findBestAction(actions, policy, beliefState):
    bestValue = -1230981239102938019
    bestAction = 0  #Assume there is at least one action
    for action in actions:
        if action in policy:
            value = findBestValue(action, policy[action], beliefState)
            if value > bestValue:
                bestValue = value
                bestAction = action
    return bestAction

def myReadPolicy(path, states, origin):
    if origin != 'zmdp':
        f = open(path, "r")
        alpha_vectors = {0: [], 1:[], 2:[]}

        action = -1
        for line in f:
            if len(line) == 2:
                action = int(line)
            elif len(line) > 2:
                strategy_vectors = [float(x) for x in line.split()]
                if len(strategy_vectors) == states:
                    alpha_vectors[action].append(strategy_vectors)
    else:
        alpha_vectors = readPolicy(path, states)
    
    return alpha_vectors

def update_belief(prevBelief, vote, acc, states_per_class):
    newBeliefs = []
    for i in range(0, 2):  # 0,1
        for j in range(0, states_per_class):
            state = (i * states_per_class) + j
            
            if vote == i:
                newBeliefs.append(acc * prevBelief[state])
            else:
                newBeliefs.append((1 - acc) * prevBelief[state])
                
    newBeliefs.append(0.0)
    normalize(newBeliefs)
    return newBeliefs

def input_adapter(responses):
    '''
    :param responses:
    :return: Psi
    '''
    Psi = [[] for _ in responses.keys()]
    i = 0
    for obj_id, obj_responses in responses.items():
        k = 0
        for worker_id, worker_response in obj_responses.items():
            Psi[i].append((k, worker_response[0]))
            k += 1
        i += 1
    return Psi


In [133]:
def run_exp(policy, items, states, num_actions, workers_accuracy, ground_truth, acc):
    belief = [1 for i in range(states)]  # init , equivalent to [1] * numStates
    belief[states-1] = 0  # last states = 0, terminating state
    belief = normalize(belief)
    beliefs = [deepcopy(belief) for i in range(items)]
    beliefState = beliefs[0]
    actions = range(0, num_actions)
    items_classification = {}
    votes = {i:{} for i in range(items)}

    states_per_class = int((states - 1) / 2)

    while len(items_classification) != items:
        for item_id, belief_state in enumerate(beliefs):
            if item_id not in items_classification.keys(): # if item is classified
                best_action = findBestAction(actions, policy, belief_state)

                #prevent
                if len(votes[item_id]) > 100:
                    mv = (sum(votes[item_id]) / len(votes[item_id])) > 0.5
                    if mv:
                        best_action = 2
                    else:
                        best_action = 1

                if best_action == 0:
                    worker_id, vote = get_worker_vote(workers_accuracy, item_id, ground_truth, votes)
                    votes[item_id][worker_id] = [vote]
                    
                    belief_state = update_belief(belief_state, vote, acc, states_per_class)
                    
                    beliefs[item_id] = belief_state
                elif best_action == 1: #submit zero
                    items_classification[item_id] = 0
                else: #submit one
                    items_classification[item_id] = 1
                    
        #end for
        if min([len(v) for a,v in votes.items()]) == 3:
            accs, p = expectation_maximization(max([len(v) for a,v in votes.items()]), items, input_adapter(votes))
            acc = np.mean(accs)
        
    #end while
    num_votes = sum([len(v) for k,v in votes.items()])

    return items_classification, num_votes

In [128]:
data_true_percentage = .5
states = 3
items = 100
num_actions = 3
lr = 5
acc = .75

In [141]:
def run_experiment(policy, data_true_percentage, states, items, num_actions, lr, acc):
    ground_truth = generate_gold_data(items, data_true_percentage)
    workers_accuracy = simulate_workers(1000, 0, False, acc, base_acc = .5)
    
    losses  = []
    recalls  = []
    precisions  = []
    votes_amount = []
    for _ in range(50):
        items_classification, num_votes = run_exp(policy, items, states, num_actions, workers_accuracy, ground_truth, acc)
        
        votes_amount.append(num_votes / items) #average
        
        loss,  recall, precision = Metrics.compute_metrics(items_classification, ground_truth, lr)
        losses.append(loss)
        recalls.append(recall)
        precisions.append(precision)

    print(f" Votes: {np.mean(votes_amount)} / {np.std(votes_amount)} \n Loss: {np.mean(losses)} / {np.std(losses)} \n Recall: {np.mean(recalls)} / {np.std(recalls)} \n Precision: {np.mean(precisions)} / {np.std(precisions)}")

In [143]:
policies = []
policies.append(myReadPolicy('/Users/pmaglione/Documents/pomdp_solve_test/3states/-500/test.alpha', states, 'pomdp-solve'))
policies.append(myReadPolicy('/Users/pmaglione/Documents/pomdp_solve_test/3states/-1000/test.alpha', states, 'pomdp-solve'))
policies.append(myReadPolicy('/Users/pmaglione/Documents/pomdp_solve_test/3states/-10/test.alpha', states, 'pomdp-solve'))

for policy in policies:
    run_experiment(policy, data_true_percentage, states, items, num_actions, lr, acc)
    print(" --- ")

 Votes: 9.6322 / 0.9893680609358683 
 Loss: 0.0238 / 0.03205557673790942 
 Recall: 0.992 / 0.01200000000000001 
 Precision: 0.9925161202942715 / 0.012344492992408751
 --- 
 Votes: 10.5534 / 1.190007747873937 
 Loss: 0.0068000000000000005 / 0.014891608375189026 
 Recall: 0.9980000000000001 / 0.006000000000000005 
 Precision: 0.9964705882352942 / 0.007533087338156308
 --- 
 Votes: 1.0 / 0.0 
 Loss: 0.7298 / 0.16922753913001276 
 Recall: 0.7568 / 0.06564876236457166 
 Precision: 0.7577381369027768 / 0.04809981928535244
 --- 


In [144]:
states = 23
policy = myReadPolicy('./ModelLearning/Policies/W1_COST500.policy', states, 'zmdp')
run_experiment(policy, data_true_percentage, states, items, num_actions, lr, acc)

 Votes: 3.1920000000000006 / 0.17577258034175863 
 Loss: 0.2926 / 0.08666741025322032 
 Recall: 0.9036 / 0.033392214661504556 
 Precision: 0.8992675825856574 / 0.040620210608958304


## Compare workers error rate estimation

In [13]:
import numpy as np
import random

#generate votes
numWorkers = 10
numberOfProblems = 10
numVotes = 10
votes = {}

def get_worker_to_vote(numWorkers, votes):
    found = False
    while found is False:
        worker_id = random.randint(1,numWorkers)
        if worker_id not in votes.keys():
            found = True
            
    return worker_id
        
for pn in range(numberOfProblems):
    votes[pn] = {}
    for i in range(numVotes):
        worker_id = get_worker_to_vote(numWorkers, votes[pn])
        observation = np.random.binomial(1, .75)
        votes[pn][worker_id] = observation

### theirs

In [2]:
from AgentHunt_single.ModelLearning.utils import *
from AgentHunt_single.Data import *
from AgentHunt_single.Ballots import *

import time
import subprocess
#from random import random
from os import mkdir, rmdir
from copy import deepcopy
from math import floor
from functools import reduce
import math

In [5]:
EMPATH = '/Users/pmaglione/Repos/adaptive-pomdp-solutions/AgentHunt_single/EM/em'
gammaList = [1]
numberOfWorkerPools = 1
ballots = Ballots(EMPATH)  # BALLOTS OBJECT

fastLearning = False

agentActions = [-1 for i in range(0, numberOfProblems)]

observations = []
for i in range(numberOfProblems):
    observations.append([])
    for j in range(numberOfWorkerPools):
        observations.append([])

for pn, item_votes in votes.items():
    for worker_id, observation in item_votes.items():
        observations[pn].append((observation, worker_id))

bestAction = 1
for i in range(numberOfProblems):
    ballots.addQuestionAndRelearn(
                            observations[i],  # all obs
                            bestAction,  # 0/1
                            .7,  # diff with higher %
                            fastLearning)

#print(votes)
print(ballots.workersToIntegers)
print(ballots.workersToGammas0)


{4: 0, 6: 1, 1: 2, 5: 3, 10: 4, 2: 5, 3: 6, 9: 7, 7: 8, 8: 9}
{4: 0.685824, 6: 0.830336, 1: 0.686345, 5: 0.562498, 10: 0.932432, 2: 0.027327, 3: 0.943438, 9: 0.231426, 7: 0.73242, 8: 0.511309}


### Run pomdp base with python-glad estimation

In [31]:
import python_glad.glad_bin as glad_bin

def get_workers_order(votes):
    worker_order = 0
    worker_to_order = {}
    order_to_worker = {}
    num_votes = 0
    num_workers = 0
    num_items = len(votes)
    
    for item_id, item_votes in votes.items():
        for worker_id, vote in item_votes.items():
            num_votes += 1
            if worker_id not in worker_to_order.keys():
                worker_to_order[worker_id] = worker_order 
                order_to_worker[worker_order] = worker_id
                worker_order += 1
                
    num_workers = len(worker_to_order)
    
    return num_items, num_workers, num_votes, worker_to_order, order_to_worker

def writeToEMFormat(votes, path_em_input_file):
    num_items, num_workers, num_votes, worker_to_order, order_to_worker = get_workers_order(votes)
    
    outputfile = open(path_em_input_file, 'w')
    prior_beta = 0.5
    #file headers
    outputfile.write('%d %d %d %f\n' % (num_votes, num_workers, num_items, prior_beta))
    
    #votes
    for item_id, item_votes in votes.items():
        for worker_id, vote in item_votes.items():
            outputfile.write('%d %d %d\n' % (item_id, worker_to_order[worker_id], vote))

    outputfile.close()
    
    return worker_to_order, order_to_worker

def get_worker_error_rate_estimation(votes, path_em_input_file):
    # min 2 votes per item
    if all(len(v) >= 2 for k, v in votes.items()):
        worker_to_order, order_to_worker = writeToEMFormat(votes, path_em_input_file)

        error_rates = glad_bin.estimate(path_em_input_file)

        return {order_to_worker[worker_order]: abs(error_rate) for worker_order, error_rate in error_rates.items()}
    else:
        return {}

    
    
path_em_input_file = './log/em/ballots.eminput'

#filename = '/Users/pmaglione/Repos/adaptive-pomdp-solutions/log/em/test_ballots_pure.eminput'
#glad_bin.estimate(filename)
error_rates = get_worker_error_rate_estimation(votes, path_em_input_file)