In [1]:
import os
import pandas as pd
from fractions import Fraction

In [2]:
#human annotation strict consistency
path_strict_consistency_human_annotation = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/human_annotation/strict_consistency/'
path_soft_consistency_human_annotation = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/human_annotation/soft_consistency/'

#strict consistency
path_no_context_strict_consistency_chatgpt = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/no-context/ChatGPT/strict_consistency/'
path_no_context_strict_consistency_googlebard = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/no-context/GoogleBard/strict_consistency/'
path_context_strict_consistency_chatgpt = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/context/ChatGPT/strict_consistency/'

#soft consistency
path_no_context_soft_consistency_chatgpt = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/no-context/ChatGPT/soft_consistency/'
path_no_context_soft_consistency_googlebard = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/no-context/GoogleBard/soft_consistency/'
path_context_soft_consistency_chatgpt = '/Users/brain/Documents/GitHub/LLMs-topic-classification/results/mapped_csv/context/ChatGPT/soft_consistency/'

#datasets names list
dataset_names_list = ["Education_expenditure_and_indicators.csv",
                      "Health_expectancy.csv",
                      "Listed_monuments.csv", 
                      "Livestock.csv",
                      "Milk_supply_and_dairy_production.csv",
                      "Mobility.csv",
                      "Plant_protection_products.csv",
                      "Population_dynamics.csv",
                      "Social_security.csv",
                      "Trade_and_industry.csv"]

In [3]:
def prepare_df_for_alignment_calculation(human_annotation_path, machine_annotation_path):
    
    h_df = pd.read_csv(human_annotation_path).set_index('col_headers').T
    m_df = pd.read_csv(machine_annotation_path)
    m_df.drop(columns=['run_index'], inplace=True) if 'run_index' in m_df.columns else None
    m_df.drop(columns=['ID'], inplace=True) if 'ID' in m_df.columns else None
    
    return h_df, m_df

In [4]:
def calculate_alignment_score(h_df, m_df):
    
    comparison_results = {}

    human_annotators_count = 3
    llm_runs_count = 10

    for column in h_df.columns:
        h_df[column] = pd.to_numeric(h_df[column], errors='coerce')
        h_counts = h_df[column].value_counts()

        comparison_results[column] = {}

        for value, count in h_counts.items():
            m_count = m_df[m_df[column] == value].shape[0]

            comparison_results[column][value] = {'h_df_count': Fraction(count, human_annotators_count),
                                                 'm_df_count': Fraction(m_count, llm_runs_count)}

    sum_result = Fraction(0)
    for col, code_results in comparison_results.items():
        for code, result in code_results.items():
            sum_result += result['h_df_count'] * result['m_df_count']

    alignment_score = Fraction(sum_result, len(comparison_results))
    
    return alignment_score

In [5]:
def calculate_dataset_alignment_code(human_annotation_folder_path, 
                                     machine_annotation_folder_path, 
                                     dataset_name, 
                                     alignment_score_df):
    
    human_annotation_dataset_path = os.path.join(human_annotation_folder_path, dataset_name)
    machine_annotation_dataset_path = os.path.join(machine_annotation_folder_path, dataset_name)
    
    h_df, m_df = prepare_df_for_alignment_calculation(human_annotation_dataset_path, machine_annotation_dataset_path)
    
    alignment_score = calculate_alignment_score(h_df, m_df)
    
    alignment_score_df.loc[dataset_name, machine_annotation_folder_path] = alignment_score
    
    return alignment_score_df

In [6]:
alignment_score_df = pd.DataFrame()
alignment_score_df['datasets'] = dataset_names_list
alignment_score_df.set_index('datasets', inplace=True)

for dataset_name in dataset_names_list:
    
    ##STRICT CONSISTENCY
    # no_context_strict_consistency_chatgpt
    calculate_dataset_alignment_code(path_strict_consistency_human_annotation, 
                                     path_no_context_strict_consistency_chatgpt, 
                                     dataset_name, 
                                     alignment_score_df)
    
    # no_context_strict_consistency_googlebard
    calculate_dataset_alignment_code(path_strict_consistency_human_annotation, 
                                     path_no_context_strict_consistency_googlebard, 
                                     dataset_name, 
                                     alignment_score_df)
    
    # context_strict_consistency_chatgpt
    calculate_dataset_alignment_code(path_strict_consistency_human_annotation, 
                                     path_context_strict_consistency_chatgpt, 
                                     dataset_name, 
                                     alignment_score_df)
    
    ##SOFT CONSISTENCY
    # no_context_soft_consistency_chatgpt
    calculate_dataset_alignment_code(path_soft_consistency_human_annotation, 
                                     path_no_context_soft_consistency_chatgpt, 
                                     dataset_name, 
                                     alignment_score_df)
    
    # no_context_soft_consistency_googlebard
    calculate_dataset_alignment_code(path_soft_consistency_human_annotation, 
                                     path_no_context_soft_consistency_googlebard, 
                                     dataset_name, 
                                     alignment_score_df)
    
    # context_soft_consistency_chatgpt
    calculate_dataset_alignment_code(path_soft_consistency_human_annotation, 
                                     path_context_soft_consistency_chatgpt, 
                                     dataset_name, 
                                     alignment_score_df)

In [7]:
alignment_score_df.to_csv('/Users/brain/Documents/GitHub/LLMs-topic-classification/results/alignment_scores.csv')