In [1]:
import os
import math
import numpy as np
import pandas as pd

from scipy.spatial.distance import jensenshannon

In [None]:
models = ['Falcon3-7B-Instruct', 'Llama-3.2-3B-Instruct', 'Llama-3.1-8B-Instruct', 
          'llama3.1-8b-cpt-sea-lionv3-instruct', 'Mistral-7B-Instruct', 'Qwen2.5-7B-Instruct']

In [None]:
df = pd.read_csv("Fair-PP_test.csv") # or directly get our dataset at https://huggingface.co/datasets/tools-o/Fair-PP

In [None]:
def calculate_js_distances(df, model):
    role_columns = [f'gpt-4o-mini_role_{i}_answer' for i in range(1, 8)]
    role_columns.append(f'{model}_answer')
    distributions = {}
    for col in role_columns:
        counts = df[col].value_counts(normalize=True).reindex([0, 1, 2], fill_value=0)
        distributions[col] = counts.values
    
    n_roles = len(role_columns)
    js_distances = np.zeros((n_roles, n_roles))
    
    for i in range(n_roles):
        for j in range(n_roles):
            if i <= j:
                js_dist = jensenshannon(distributions[role_columns[i]], 
                                      distributions[role_columns[j]])
                js_distances[i, j] = js_dist
                js_distances[j, i] = js_dist

    result_df = pd.DataFrame(js_distances, 
                           index=[f'role_{i+1}' for i in range(8)],
                           columns=[f'role_{i+1}' for i in range(8)])
    
    return result_df

for model in models:
    result = calculate_js_distances(df, model)
    print(model, result)