In [24]:
"""
correlation_analysis.ipynb

This notebook calculates the Spearman and Pearson correlations between human and GPT-4o ratings.

Steps:
1. Load human ratings and GPT-4o ratings from the JSON files.
2. Calculate Spearman, Pearson, and Kendalltau correlations.
3. Display the correlation results.
"""

'\ncorrelation_analysis.ipynb\n\nThis notebook calculates the Spearman and Pearson correlations between human and GPT-4o ratings.\n\nSteps:\n1. Load human ratings and GPT-4o ratings from the JSON files.\n2. Calculate Spearman, Pearson, and Kendalltau correlations.\n3. Display the correlation results.\n'

In [17]:
import json

# 文件路径
input_file_path = 'outputs/gpt4_scored_Llama-3-8B.json'
output_file_path = 'outputs/human_scored_gpt4_scored_Llama-3-8B.json'

# 读取 JSON 文件
with open(input_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# 准备新的数据列表
filtered_data = []

# 处理数据
for item in data:
    if 'turns' in item:
        filtered_turns = []
        for i, turn in enumerate(item['turns']):
            # 检查 predict_type 和 gold_type
            predict_type = turn.get('predict_type', None)
            gold_type = item['turns'][i-1].get('type', '') if i > 0 else None

            if predict_type == gold_type and gold_type != "improper" and gold_type != "answerable":
                if 'RQS' in turn and turn['RQS'] != "N/A":
                    # 添加新的字段 RQS_human_avg
                    turn['RQS_human_avg'] = turn['RQS']
                    filtered_turns.append(item['turns'][i-1])
                    filtered_turns.append(turn)
        
        if filtered_turns:
            item['turns'] = filtered_turns
            filtered_data.append(item)

# 将修改后的数据写入新的 JSON 文件
with open(output_file_path, 'w', encoding='utf-8') as file:
    json.dump(filtered_data, file, ensure_ascii=False, indent=4)

print(f"Processing complete. Results saved to '{output_file_path}'")


Processing complete. Results saved to 'outputs/human_scored_gpt4_scored_Llama-3-8B.json'


In [18]:
import json
import pandas as pd
from scipy.stats import pearsonr, spearmanr, kendalltau

# 文件路径
human_file_path = 'outputs/human_scored.json'

# 读取人工打分文件
with open(human_file_path, 'r', encoding='utf-8') as human_file:
    human_data = json.load(human_file)

# 准备结果列表
results = []

# 处理人工打分数据
for item in human_data:
    if 'turns' in item:
        for i,turn in enumerate(item['turns']):
            if 'RQS' in turn and turn['RQS'] != "N/A" and 'RQS_human_avg' in turn and turn['RQS_human_avg'] != "N/A":
                predict_type = turn.get('predict_type', None)
                gold_type = item['turns'][i-1].get('type','')
                if predict_type == gold_type:
                    results.append({
                        'Type': predict_type,
                        'RQS': int(turn['RQS']),
                        'RQS_human': int(turn['RQS_human_avg'])
                    })

# 将结果转换为DataFrame
df = pd.DataFrame(results)

print("Preview of df:")
print(df.head())  # 仅显示前5行

# 计算相关系数
correlation_results = []

for predict_type in ['unanswerable', 'ambiguous', 'improper']:
    subset = df[df['Type'] == predict_type]
    
    print(subset)
    
    if len(subset) > 1:  # 确保有足够的样本
        if subset['RQS'].nunique() > 1 and subset['RQS_human'].nunique() > 1:
            # 计算Pearson相关系数
            pearson_corr, pearson_pval = pearsonr(subset['RQS'], subset['RQS_human'])
            
            # 计算Spearman相关系数
            spearman_corr, spearman_pval = spearmanr(subset['RQS'], subset['RQS_human'])
            
            # 计算Kendall相关系数
            kendall_corr, kendall_pval = kendalltau(subset['RQS'], subset['RQS_human'])

            correlation_results.append({
                'Type': predict_type,
                'Pearson': pearson_corr,
                'Pearson P-value': pearson_pval,
                'Spearman': spearman_corr,
                'Spearman P-value': spearman_pval,
                'Kendall': kendall_corr,
                'Kendall P-value': kendall_pval
            })
        else:
            print(f"Type '{predict_type}' has constant RQS or RQS_human values, skipping correlation calculation.")

# 将相关结果转换为DataFrame
correlation_df = pd.DataFrame(correlation_results)

# 打印相关结果
print(correlation_df)

# 如果需要，可以选择将结果保存到文件
# correlation_df.to_json('correlation_results.json', orient='records', lines=True, force_ascii=False)

print("Correlation analysis complete.")


Preview of df:
           Type  RQS  RQS_human
0  unanswerable    1          0
1  unanswerable   10         10
2  unanswerable   10         10
3  unanswerable    9         10
4     ambiguous    0          0
             Type  RQS  RQS_human
0    unanswerable    1          0
1    unanswerable   10         10
2    unanswerable   10         10
3    unanswerable    9         10
5    unanswerable    2          2
7    unanswerable   10         10
8    unanswerable   10         10
9    unanswerable   10         10
13   unanswerable   10         10
15   unanswerable   10         10
16   unanswerable   10         10
40   unanswerable    9          7
44   unanswerable   10         10
83   unanswerable   10         10
86   unanswerable   10         10
100  unanswerable    8         10
131  unanswerable   10          4
134  unanswerable   10          9
135  unanswerable   10         10
145  unanswerable   10         10
148  unanswerable   10         10
          Type  RQS  RQS_human
4    ambiguous