In [2]:
from dai_pomdp.ModelLearning.utils import *
from dai_pomdp.Data import *
from dai_pomdp.Ballots import *
from dai_pomdp.helpers import *

import random
import time
import subprocess
#from random import random
from os import mkdir, rmdir
from copy import deepcopy
from math import floor
from functools import reduce
import numpy as np
import math

# ACTIONS
CONST_NO_ANSWER = -1
CONST_REQUEST_VOTE = 0
CONST_SUBMIT_ZERO = 1
CONST_SUBMIT_ONE = 2

def round_to_3(value):
    return round(value, 3)

def are_items_unresolved(answers):
    return any(answer == CONST_NO_ANSWER for answer in answers)

def get_unresolved_items(answers):
    return [key for key, answer in enumerate(answers) if answer == CONST_NO_ANSWER]

def get_random_worker(workers_error_rates, item_id, votes):
    item_votes = votes[item_id].copy()
    worker_ids_used = item_votes.keys()
    workers_ids_range = [k for k,v in enumerate(workers_error_rates)]
    workers_ids_unused = [val for val in workers_ids_range if val not in worker_ids_used]
    
    if (len(workers_ids_unused) == 0):
        used = len(worker_ids_used)
        ranges = len(workers_ids_range)
        unu = len(workers_ids_unused)
        print(f'used: {used}')
        print(f'workers: {ranges}')
        print(f'unused: {unu}')
        raise ValueError("Unused empty!?")
    
    selected_worker_id = np.random.choice(workers_ids_unused)
    error_rate = workers_error_rates[selected_worker_id]

    return selected_worker_id, error_rate

def get_accuracy(item_diff, worker_error_rate):
    return (1.0 / 2) * (1.0 + (1.0 - item_diff) ** worker_error_rate)

def get_worker_vote(item_id, items_votes, items_difficulties, items_gt, workers_error_rates):
    selected_worker_id, worker_error_rate = get_random_worker(workers_error_rates, item_id, items_votes)
    worker_acc = get_accuracy(items_difficulties[item_id], worker_error_rate)
    
    if np.random.binomial(1, worker_acc):
        return selected_worker_id, items_gt[item_id]
    else:
        return selected_worker_id, 1 - items_gt[item_id]
    
def get_worker_error_rate_estimation(items_votes):
    # min 2 votes per item
    if all(len(v) >= 2 for k, v in items_votes.items()):
        workers_to_int = writeToEMFormat(items_votes)

        gammas, difficulties, posteriors = get_results()

        worker_keys = list(workers_to_int.keys())
        worker_int_keys = list(workers_to_int.values())

        return {worker_keys[worker_int_keys.index(key)]: gamma for key, gamma in enumerate(gammas)}
    else:
        return {}


def get_error_rate(worker_id, estimated_error_rates, avg_error_rate):
    if worker_id in estimated_error_rates.keys():
        return estimated_error_rates[worker_id]
    elif len(estimated_error_rates) != 0:
        return sum(estimated_error_rates.values()) / len(estimated_error_rates)  # AVG over known workers
    else:
        return avg_error_rate


def get_worker_error_rate(worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted):
    if estimate_after:
        if have_submitted:
            return get_error_rate(worker_id, estimated_error_rates, avg_error_rate)
        else:
            return avg_error_rate
    else:
        return get_error_rate(worker_id, estimated_error_rates, avg_error_rate)

#data utils
'''
    items_num - number of items
    possitive_percentage - [0,1] percentage of possitive items
'''
def generate_gold_data(items_num, possitive_percentage):
    pos_items_number = int(round(((possitive_percentage * 100) * items_num) / 100))     
    gold_data = ([1] * pos_items_number) + ([0] * (items_num - pos_items_number))
    random.shuffle(gold_data)

    return gold_data

def solve(num_states, states_difficulties, avg_error_rate, policy, workers_error_rates, items_difficulties, items_gt , estimate_after = True):
    
    num_items = len(items_gt)
    
    actions = range(0, 3) # 0,1,2

    items_votes = {}
    for item_id in range(num_items):
        items_votes[item_id] = {}

    # init beliefs
    belief = [1 for i in range(num_states)]
    belief[num_states-1] = 0  # last states = 0, terminating state
    belief = normalize(belief)
    beliefs = [deepcopy(belief) for i in range(num_items)]
    
    answers = [CONST_NO_ANSWER for i in range(0, num_items)]
    
    iteration_number = 0
    while are_items_unresolved(answers):
        iteration_number += 1
        
        items_to_vote = []
        unresolved_items = get_unresolved_items(answers)
        unresolved_items_num = len(unresolved_items)
        
        for item_id in unresolved_items:
            beliefState = beliefs[item_id]
            bestAction = findBestAction(actions, policy, beliefState)
            bestAction = int(bestAction)
            
            if bestAction == CONST_REQUEST_VOTE:
                items_to_vote.append(item_id)
            elif bestAction == CONST_SUBMIT_ZERO or bestAction == CONST_SUBMIT_ONE:
                if bestAction == CONST_SUBMIT_ZERO:
                    answers[item_id] = 0
                else:
                    answers[item_id] = 1

        #end for
        
        have_submitted = unresolved_items_num != num_items
        
        for item_to_vote in items_to_vote:
            worker_id, vote = get_worker_vote(item_to_vote, items_votes, items_difficulties, items_gt, workers_error_rates)
            items_votes[item_to_vote][worker_id] = vote
        
        
        estimated_error_rates = get_worker_error_rate_estimation(items_votes)
        
        for item_id in items_to_vote:
            last_vote = list(items_votes[item_id].values())[-1]
            last_worker_id = list(items_votes[item_id])[-1]
            beliefs[item_id] = updateBelief(beliefs[item_id], last_vote, states_difficulties,
                                     get_worker_error_rate(last_worker_id, estimated_error_rates, avg_error_rate, estimate_after, have_submitted))
            
        #print(f"Num to vote: {len(items_to_vote)}")
    #end while
            
    return answers, items_votes

def genPOMDP(filename, fnc, fpc, cost_vote, gammas, numberOfWorkerPools, difficulties=None):

    if difficulties is None:
        difficulties = getDifficulties(0.1)

    numDiffs = len(difficulties)

    reward_correct_answer = 0
    
    #Add one absorbing state
    numberOfStates = ((numDiffs) * 2) + 1
    numberOfActions = numberOfWorkerPools + 2
    file = open(filename, 'w')
    file.write('discount: 0.9999\n')
    file.write('values: reward\n')
    file.write('states: %d\n' % numberOfStates)
    file.write('actions: %d\n' % numberOfActions)
    SUBMITZERO = numberOfWorkerPools  # 2 worker pools
    SUBMITONE = numberOfWorkerPools + 1  # 3 , for 2 worker pools
    file.write('observations: Zero One None\n')

    for i in range(0, numberOfStates):
        for k in range(0, numberOfWorkerPools):
            file.write('T: %d : %d : %d %f\n' % (k, i, i, 1.0))

    #Add transitions to absorbing state
    file.write('T: %d : * : %d %f\n' % (SUBMITZERO, numberOfStates-1, 1.0))
    file.write('T: %d : * : %d %f\n' % (SUBMITONE, numberOfStates-1, 1.0))

    #Add observations in absorbing state
    file.write('O: * : %d : None %f\n' % (numberOfStates-1, 1.0))

    for v in range(0, 2):
        for diffState in range(0,numDiffs):
        #for diffState in product(range(numDiffs), repeat = numberOfWorkerPools):
            state = v * numDiffs + diffState
            """for k in range(0, numberOfWorkerPools):
                state += (diffState[k] * (numDiffs ** (numberOfWorkerPools - (k+1))))"""
            file.write('O: %d: %d : None %f\n' % (SUBMITZERO, state, 1.0))
            file.write('O: %d: %d : None %f\n' % (SUBMITONE, state, 1.0))
            if v == 0: #if the true answer is 0
                for k in range(0, numberOfWorkerPools):
                    file.write('O: %d : %d : Zero %f\n' % 
                               (k, state, calcAccuracy(gammas[k], difficulties[diffState])))
                    # gamma: shape * scale. i.e: gamma(4,0.42) = 1.68
                    file.write('O: %d : %d : One %f\n' % 
                               (k, state, 1.0 - calcAccuracy(gammas[k], difficulties[diffState])))
            else: # if the answer is 1
                for k in range(0, numberOfWorkerPools):
                    file.write('O: %d : %d : Zero %f\n' % 
                               (k, state, 1.0 - calcAccuracy(gammas[k], difficulties[diffState])))
                    file.write('O: %d : %d : One %f\n' % 
                               (k, state, calcAccuracy(gammas[k], difficulties[diffState])))


    for v in range(numberOfWorkerPools):
        file.write('R: %d : * : * : * %f\n' % (v, cost_vote))  # reward request more vote


    for i in range(numberOfStates-1):
        if i < (numberOfStates-1) / 2:  # true label = 0
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITZERO, i, numberOfStates-1, reward_correct_answer))
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITONE, i, numberOfStates-1, fpc))
        else:  # true label = 1 
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITONE, i, numberOfStates-1, reward_correct_answer))
            file.write('R: %d : %d : %d : * %f\n' % (SUBMITZERO, i, numberOfStates-1, fnc))

    #Add rewards in absorbing state
    file.write('R: * : %d : %d : * %f\n' % (numberOfStates-1, numberOfStates-1, 0))

    file.close()

In [None]:
import dai_pomdp.algorithms_utils as alg_utils
import pandas as pd


columns = ['name','num_workers','workers_distribution','policy_name','num_items','data_bal','items_diff','num_states','cost','cost_std', 'recall','recall_std', 'precision', 'precision_std', 'loss', 'loss_std', 'f1', 'f1_std', 'fbeta', 'fbeta_std', 'estimate_after', 'avg_error_rate', 'wce','wce_std','fnc','fpc']

items_num = 1000
possitive_percentage = 0.5
item_difficulty = 0.5
items_difficulties = [item_difficulty] * items_num
items_ground_truth = generate_gold_data(items_num, possitive_percentage)


workers_num = 100

avg_error_rate = 1

dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

workers_error_rates = np.random.normal(normal_mean, normal_std, workers_num)

states_num = 23
policy_path = "/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/ModelLearning/Policies/"

state_diff = getDifficulties(0.1)

fncs = [-1,-5,-10,-500]
fpcs = [-1,-5,-10,-500]

for moment_error_estimation in [False, True]:
    for fnc in fncs:
        for fpc in fpcs:
            total_results = []

            policy_name = f'wrong-cost-fnc{fnc}-fpc{fpc}.policy'
            policy = readPolicy(policy_path + policy_name, states_num)

            losses = []
            recalls = []
            precisions = []
            costs = []
            f_ones = []
            f_betas = []
            wces = []

            for _ in range(10):
                answers, items_votes = solve(states_num, state_diff, avg_error_rate, policy, workers_error_rates, items_difficulties, items_ground_truth, moment_error_estimation)

                costs.append(np.mean([len(v) for k,v in items_votes.items()]))    
                
                loss, recall, precision, f1, beta, f_beta, wce = alg_utils.Metrics.compute_metrics(answers, items_ground_truth, -1*fnc, -1*fpc)
                losses.append(loss)
                recalls.append(recall)
                precisions.append(precision)
                f_ones.append(f1)
                f_betas.append(f_beta)
                wces.append(wce)
                # end for iterations

            result = [f"wrong-cost-fnc{fnc}-fpc{fpc}", workers_num, dist_name+f"({normal_mean},{normal_std})", policy_name, items_num, possitive_percentage, 
                      item_difficulty, states_num, round_to_3(np.mean(costs)), round_to_3(np.std(costs)), round_to_3(np.mean(recalls)), round_to_3(np.std(recalls)),
                     round_to_3(np.mean(precisions)), round_to_3(np.std(precisions)), round_to_3(np.mean(losses)), round_to_3(np.std(losses)),
                     round_to_3(np.mean(f_ones)), round_to_3(np.std(f_ones)), round_to_3(np.mean(f_betas)), round_to_3(np.std(f_betas)), moment_error_estimation, avg_error_rate, 
                      round_to_3(np.mean(wces)), round_to_3(np.std(wces)), fnc, fpc]

            total_results.append(result)
            pd.DataFrame(total_results, columns=columns).to_csv(f"/Users/pmaglione/Repos/adaptive-pomdp-solutions/dai_pomdp/results/pomdp_results_diff.csv", mode='a', index=False, header=False)

        #end for


In [None]:
dist_name = "Normal"
normal_mean = 1
normal_std = 0.2

items_num = 1000
possitive_percentage = 0.5