### Annotated by 3 workers

In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.inter_rater import fleiss_kappa

def process_annotations(input_file_path, output_file_path):
    # Load the dataset
    data = pd.read_excel(input_file_path)

    label_mapping = {'None': 0, 'Low': 1, 'Medium': 2, 'High': 3}
    reverse_label_mapping = {v: k for k, v in label_mapping.items()}  # Reverse mapping for categorical labels
    
    for col in ['Answer.utilityValue.worker1', 'Answer.utilityValue.worker2', 'Answer.utilityValue.worker3']:
        data[f"{col}_numeric"] = data[col].map(label_mapping)

    # Define the function to calculate ground truth and the method used
    def calculate_ground_truth_and_method(row):
        annotations = [row['Answer.utilityValue.worker1_numeric'], 
                       row['Answer.utilityValue.worker2_numeric'], 
                       row['Answer.utilityValue.worker3_numeric']]
        
        # Majority Voting
        unique_counts = pd.Series(annotations).value_counts()
        if unique_counts.iloc[0] >= 2:  # At least two annotators agree
            ground_truth = unique_counts.idxmax()
            method = "Majority Voting"
        else:  # Weighted Aggregation
            ground_truth = round(np.mean(annotations))
            method = "Weighted Aggregation"
        
        return ground_truth, method

    data[['GroundTruthNumerical', 'MethodUsed']] = data.apply(
        lambda row: pd.Series(calculate_ground_truth_and_method(row)), axis=1
    )

    data['GroundTruthCategorical'] = data['GroundTruthNumerical'].map(reverse_label_mapping)
    data.to_excel(output_file_path, index=False)
    print(f"Processed data saved to: {output_file_path}")

input_file_path = '/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hotels/human_ITA_agreement.xlsx'
output_file_path = '/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hotels/ground_truth.xlsx'

process_annotations(input_file_path, output_file_path)