In [1]:
"""
correlation_analysis.ipynb

This notebook calculates the Spearman and Pearson correlations between human and GPT-4o ratings.

Steps:
1. Load human ratings and GPT-4o ratings from the JSON files.
2. Calculate Spearman, Pearson, and Kendalltau correlations.
3. Display the correlation results.
"""

'\ncorrelation_analysis.ipynb\n\nThis notebook calculates the Spearman and Pearson correlations between human and GPT-4o ratings.\n\nSteps:\n1. Load human ratings and GPT-4o ratings from the JSON files.\n2. Calculate Spearman, Pearson, and Kendalltau correlations.\n3. Display the correlation results.\n'

In [2]:
import json

# Read the JSON file
with open('outputs/gpt4_scored_gemini-1.5-flash.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Iterate through each element
for item in data:
    if 'turns' in item:
        for turn in item['turns']:
            if 'RQS' in turn and turn['RQS'] != "N/A":
                # Add a new field RQS_human
                turn['RQS_human'] = turn['RQS']
                turn['RQS'] = turn['RQS']
                    
# Write the modified data to a new JSON file
with open('outputs/human_scored_gemini-1.5-flash.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, ensure_ascii=False, indent=4)

print("Processing complete. Results saved to 'modified_output.json'")


Processing complete. Results saved to 'modified_output.json'


In [16]:
import json
import pandas as pd
from scipy.stats import pearsonr, spearmanr, kendalltau

# 文件路径
human_file_path = 'outputs/human_scored_gemini-1.5-flash.json'

# 读取人工打分文件
with open(human_file_path, 'r', encoding='utf-8') as human_file:
    human_data = json.load(human_file)

# 准备结果列表
results = []

# 处理人工打分数据
for item in human_data:
    if 'turns' in item:
        for turn in item['turns']:
            if 'RQS' in turn and turn['RQS'] != "N/A" and 'RQS_human' in turn and turn['RQS_human'] != "N/A":
                predict_type = turn.get('predict_type', None)
                if predict_type:
                    results.append({
                        'Type': predict_type,
                        'RQS': int(turn['RQS']),
                        'RQS_human': int(turn['RQS_human'])
                    })

# 将结果转换为DataFrame
df = pd.DataFrame(results)

print("Preview of df:")
print(df.head())  # 仅显示前5行

# 计算相关系数
correlation_results = []

for predict_type in ['unanswerable', 'ambiguous', 'improper']:
    subset = df[df['Type'] == predict_type]
    
    if len(subset) > 1:  # 确保有足够的样本
        # 计算Pearson相关系数
        pearson_corr, pearson_pval = pearsonr(subset['RQS'], subset['RQS_human'])
        
        # 计算Spearman相关系数
        spearman_corr, spearman_pval = spearmanr(subset['RQS'], subset['RQS_human'])
        
        # 计算Kendall相关系数
        kendall_corr, kendall_pval = kendalltau(subset['RQS'], subset['RQS_human'])

        correlation_results.append({
            'Type': predict_type,
            'Pearson': pearson_corr,
            'Pearson P-value': pearson_pval,
            'Spearman': spearman_corr,
            'Spearman P-value': spearman_pval,
            'Kendall': kendall_corr,
            'Kendall P-value': kendall_pval
        })

# 将相关结果转换为DataFrame
correlation_df = pd.DataFrame(correlation_results)

# 打印相关结果
print(correlation_df)

# 如果需要，可以选择将结果保存到文件
# correlation_df.to_json('correlation_results.json', orient='records', lines=True, force_ascii=False)

print("Correlation analysis complete.")


Preview of df:
         Type  RQS  RQS_human
0    improper   10         10
1  answerable    0          0
2    improper   10         10
3  answerable    0          0
4    improper   10         10
           Type   Pearson  Pearson P-value  Spearman  Spearman P-value  \
0  unanswerable  0.761883     3.893574e-06  0.622490      5.256862e-04   
1     ambiguous  1.000000     0.000000e+00  1.000000      0.000000e+00   
2      improper  0.979594    3.188615e-109  0.850603      7.659346e-45   

    Kendall  Kendall P-value  
0  0.588089     4.923213e-04  
1  1.000000     1.145737e-04  
2  0.845791     1.855500e-26  
Correlation analysis complete.
