In [10]:
import pandas as pd
from scipy.stats import spearmanr

# Load the CSV file
file_path = 'prediction_result1_phi314b-medium-4k-instruct-q5_K_M.csv'
data = pd.read_csv(file_path)

# Initialize a dictionary to store results for each domain
results_dict = {}

# Define the domains and corresponding parameter columns
domains = {
    'AGR': 'AGR_p',
    'EXT': 'EXT_p',
    'NEU': 'NEU_p',
    'CON': 'CON_p',
    'OPE': 'OPN_p'
}

# Iterate over each domain
for domain, param in domains.items():
    # Filter data for the current domain
    domain_data = data[data['domain'] == domain]
    
    # Extract the unique persona_ids
    persona_ids = domain_data['persona_id'].unique()
    
    # Initialize a list to store the results
    results = []
    
    # Calculate the rank correlation coefficient for each persona_id
    for persona_id in persona_ids:
        subset = domain_data[domain_data['persona_id'] == persona_id]
        if len(subset) > 1:  # Ensure there are at least two data points to calculate the correlation
            correlation, _ = spearmanr(subset['level'], subset[param])
            results.append({'persona_id': persona_id, 'spearman_correlation': correlation})
    
    # Convert the results to a DataFrame
    results_df = pd.DataFrame(results)
    print(results_df)
    results_df.sort_values(by='spearman_correlation', ascending=False, inplace=True)
    print(results_df)
    
    # Calculate the mean of the spearman correlations
    mean_correlation = results_df['spearman_correlation'].mean()
    
    # Store the results and mean correlation
    results_dict[domain] = {
        'results_df': results_df,
        'mean_correlation': mean_correlation
    }

print(results_dict)
# Display results for each domain
# import ace_tools as tools

# for domain, result in results_dict.items():
#     tools.display_dataframe_to_user(name=f"Spearman Rank Correlation Results by Persona for {domain}", dataframe=result['results_df'])
#     print(f"Mean Spearman Rank Correlation for {domain}: {result['mean_correlation']}")


    persona_id  spearman_correlation
0            1              0.316667
1            2              0.416667
2            3              0.766667
3            4              0.366667
4            5              0.466667
5            6              0.666667
6            7             -0.233333
7            8              0.200000
8            9              0.850000
9           10              0.350000
10          11              0.516667
11          12              0.716667
12          13              0.416667
13          14              0.800000
14          15             -0.016667
15          16              0.416667
16          17              0.550000
17          18              0.466667
18          19              0.483333
19          20              0.250000
20          21              0.666667
21          22              0.600000
22          23              0.433333
23          24              0.683333
24          25              0.316667
25          26              0.816667
2