In [None]:
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import re
import seaborn as sns
from scipy.stats import spearmanr


In [None]:
def calculate_confusion_matrix_elements(y_true, y_pred, positive_label):
    # 转换为 NumPy 数组以便操作
    y_true = np.array(y_true)
    y_pred = y_pred.to_numpy()
    

    # 计算 TP, FN, FP, TN
    tp = np.sum((y_true == positive_label) & (y_pred == positive_label))
    fn = np.sum((y_true == positive_label) & (y_pred != positive_label))
    fp = np.sum((y_true != positive_label) & (y_pred == positive_label))
    tn = np.sum((y_true != positive_label) & (y_pred != positive_label))
    
    return tp, fn, fp, tn



In [None]:
def calculate_metrics(conf_matrix):
    # 提取混淆矩阵的值
    TP, TN = conf_matrix[0, 0], conf_matrix[0, 1]
    FP, FN = conf_matrix[1, 0], conf_matrix[1, 1]
    
    # 计算指标
    Accuracy = (TP + TN) / (TP + FN + FP + TN) if (TP + FN + FP + TN) > 0 else 0
    Precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    Specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
    False_Positive_Rate = FP / (FP + TN) if (FP + TN) > 0 else 0
    
    # 返回结果
    metrics = {
        "Accuracy": Accuracy,
        "Precision": Precision,
        "Recall": Recall,
        "Specificity": Specificity,
        "False Positive Rate": False_Positive_Rate
    }
    return metrics

In [None]:
# identified item/category number distribution
# identified item/category number 和 if correct 的关系
# identified item/category number 和 if fullscore 的关系




In [None]:
# spearman corr 和 score的关系， 散点图
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
directory_path = os.path.join(desktop_path, 'human_project_data','spearman corr and score csv')
file_path1 = os.path.join(directory_path, 'p2 food dim corr and score.csv')
file_path2 = os.path.join(directory_path, 'p2only food dim corr and score.csv')
file_path3 = os.path.join(directory_path, 'p2 food item corr and score.csv')
file_path4 = os.path.join(directory_path, 'p2only food item corr and score.csv')
file_path5 = os.path.join(directory_path, 'p1 food item corr and score.csv')

df1 = pd.read_csv(file_path1)
df2 = pd.read_csv(file_path2)
df3 = pd.read_csv(file_path3)
df4 = pd.read_csv(file_path4)
df5 = pd.read_csv(file_path5)

#1. p1p2-p2 food dim
x = df1['Correlation coefficient']
y = df1['Score']
corr, _ = spearmanr(x, y)#计算斯皮尔曼相关系数

fig, ax = plt.subplots(figsize=(5,5))
sns.scatterplot(x=x, y=y, ax=ax, color = 'red' )
sns.regplot(x=x, y=y, ax=ax, scatter=False, line_kws={'color': 'grey'})
ax.set_title(f'scatter plot spearman corr with score for p1p2-p2 food dim', fontsize = 15)
ax.set_xlabel( "spearman corr for subject's answer ",fontsize = 14)
ax.set_ylabel('The highest score of subject',fontsize = 14)
ax.set_ylim(82,101)
ax.text(0.1,0.99, f'spearman corr: {corr:.2f}',fontsize = 13, color = 'grey', ha='left', va='top',transform=ax.transAxes )
plt.show()

#2. p2only food dim
x = df2['Correlation coefficient']
y = df2['Score']
corr, _ = spearmanr(x, y)#计算斯皮尔曼相关系数

fig, ax = plt.subplots(figsize=(5,5))
sns.scatterplot(x=x, y=y, ax=ax, color = 'red' )
sns.regplot(x=x, y=y, ax=ax, scatter=False, line_kws={'color': 'grey'})
ax.set_title(f'scatter plot spearman corr with score for p2only food dim', fontsize = 15)
ax.set_xlabel( "spearman corr for subject's answer ",fontsize = 14)
ax.set_ylabel('The highest score of subject',fontsize = 14)
ax.set_ylim(82,101)
ax.text(0.1,0.99, f'spearman corr: {corr:.2f}',fontsize = 13, color = 'grey', ha='left', va='top',transform=ax.transAxes )
plt.show()

#3. p1p2-p2 food item
x = df3['Correlation coefficient']
y = df3['Score']
corr, _ = spearmanr(x, y)#计算斯皮尔曼相关系数

fig, ax = plt.subplots(figsize=(5,5))
sns.scatterplot(x=x, y=y, ax=ax, color = 'red' )
sns.regplot(x=x, y=y, ax=ax, scatter=False, line_kws={'color': 'grey'})
ax.set_title(f'scatter plot spearman corr with score for p1p2-p2 food item', fontsize = 15)
ax.set_xlabel( "spearman corr for subject's answer ",fontsize = 14)
ax.set_ylabel('The highest score of subject',fontsize = 14)
ax.text(0.1,0.99, f'spearman corr: {corr:.2f}',fontsize = 13, color = 'grey', ha='left', va='top',transform=ax.transAxes )
plt.show()

#4. p2only food item
x = df4['Correlation coefficient']
y = df4['Score']
corr, _ = spearmanr(x, y)#计算斯皮尔曼相关系数

fig, ax = plt.subplots(figsize=(5,5))
sns.scatterplot(x=x, y=y, ax=ax, color = 'red' )
sns.regplot(x=x, y=y, ax=ax, scatter=False, line_kws={'color': 'grey'})
ax.set_title(f'scatter plot spearman corr with score for p2only food item', fontsize = 15)
ax.set_xlabel( "spearman corr for subject's answer ",fontsize = 14)
ax.set_ylabel('The highest score of subject',fontsize = 14)
ax.text(0.1,0.99, f'spearman corr: {corr:.2f}',fontsize = 13, color = 'grey', ha='left', va='top',transform=ax.transAxes )
plt.show()

#5. p1 food item 
x = df5['Correlation coefficient']
y = df5['Score']
corr, _ = spearmanr(x, y)#计算斯皮尔曼相关系数

fig, ax = plt.subplots(figsize=(5,5))
sns.scatterplot(x=x, y=y, ax=ax, color = 'red' )
sns.regplot(x=x, y=y, ax=ax, scatter=False, line_kws={'color': 'grey'})
ax.set_title(f'scatter plot spearman corr with score for p1 food item', fontsize = 15)
ax.set_xlabel( "spearman corr for subject's answer ",fontsize = 14)
ax.set_ylabel('The highest score of subject',fontsize = 14)
ax.text(0.1,0.99, f'spearman corr: {corr:.2f}',fontsize = 13, color = 'grey', ha='left', va='top',transform=ax.transAxes )
plt.show()



In [None]:
# food item 混淆矩阵,False positive rate,Accuracy et ac. on group total count
#1. p1 food item
#2. p2 food item
#3. p2only food item
#4. p2 food dim
#5. p2only food dim

desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
directory_path = os.path.join(desktop_path, 'human_project_data','after_sorting_csv')
file_path1 = os.path.join(directory_path, 'food_item_p1_sort.csv')
file_path2 = os.path.join(directory_path, 'food_item_p2_sort.csv')
file_path3 = os.path.join(directory_path, 'food_item_p2only_sort.csv')
file_path4 = os.path.join(directory_path, 'food_dim_p2_sort.csv')
file_path5 = os.path.join(directory_path, 'food_dim_p2only_sort.csv')
df1 = pd.read_csv(file_path1)
df2 = pd.read_csv(file_path2)
df3 = pd.read_csv(file_path3)
df4 = pd.read_csv(file_path4)
df5 = pd.read_csv(file_path5)

df1[df1 != 0] = 1
df2[df2 != 0] = 1
df3[df3 != 0] = 1
df4[df4 != 0] = 1
df5[df5 != 0] = 1

t1 = [1,1,1,1,1,1,0,0,0]
t23 = [1,1,1,1,1,1,0,0,0,0,0,0]
t45 = [1,1,0,0]

Tp1 = 0
Fn1 = 0
Fp1 = 0
Tn1 = 0
for subject, answer in df1.items():
    tp, fn, fp, tn = calculate_confusion_matrix_elements(t1, answer,1)
    Tp1 += tp
    Fn1 += fn
    Fp1 += fp
    Tn1 += tn

print(f'{Tp1},{Fn1},{Fp1},{Tn1}')

Tp2 = 0
Fn2 = 0
Fp2 = 0
Tn2 = 0
for subject, answer in df2.items():
    tp, fn, fp, tn = calculate_confusion_matrix_elements(t23, answer,1)
    Tp2 += tp
    Fn2 += fn
    Fp2 += fp
    Tn2 += tn
    
Tp3 = 0
Fn3 = 0
Fp3 = 0
Tn3 = 0
for subject, answer in df3.items():
    tp, fn, fp, tn = calculate_confusion_matrix_elements(t23, answer,1)
    Tp3 += tp
    Fn3 += fn
    Fp3 += fp
    Tn3 += tn

Tp4 = 0
Fn4 = 0
Fp4 = 0
Tn4 = 0
for subject, answer in df4.items():
    tp, fn, fp, tn = calculate_confusion_matrix_elements(t45, answer,1)
    Tp4 += tp
    Fn4 += fn
    Fp4 += fp
    Tn4 += tn

Tp5 = 0
Fn5 = 0
Fp5 = 0
Tn5 = 0
for subject, answer in df5.items():
    tp, fn, fp, tn = calculate_confusion_matrix_elements(t45, answer,1)
    Tp5 += tp
    Fn5 += fn
    Fp5 += fp
    Tn5 += tn

conf_matrix1 =  np.array([[Tp1,Tn1],
                         [Fp1,Fn1]])
conf_matrix2 =  np.array([[Tp2,Tn2],
                         [Fp2,Fn2]])
conf_matrix3 =  np.array([[Tp3,Tn3],
                         [Fp3,Fn3]])
conf_matrix4 =  np.array([[Tp4,Tn4],
                         [Fp4,Fn4]])
conf_matrix5 =  np.array([[Tp5,Tn5],
                         [Fp5,Fn5]])

#画混淆矩阵
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix1, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['identified', 'not identified'], 
            yticklabels=['relevant', 'not relevant'])

# 添加标题
plt.title('Confusion Matrix (total counts) for p1 group food item identification',fontsize=14)
plt.xlabel("Subject's answer")
plt.ylabel('Correct answer')
#plt.show()

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix2, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['identified', 'not identified'], 
            yticklabels=['relevant', 'not relevant'])

# 添加标题
plt.title('Confusion Matrix (total counts) for p1p2-p2 group food item identification',fontsize=14)
plt.xlabel("Subject's answer")
plt.ylabel('Correct answer')
#plt.show()

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix3, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['identified', 'not identified'], 
            yticklabels=['relevant', 'not relevant'])

# 添加标题
plt.title('Confusion Matrix (total counts) for p2-only group food item identification',fontsize=14)
plt.xlabel("Subject's answer")
plt.ylabel('Correct answer')
#plt.show()

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix4, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['identified', 'not identified'], 
            yticklabels=['relevant', 'not relevant'])

# 添加标题
plt.title('Confusion Matrix (total counts) for p1p2-p2 group food dim identification',fontsize=14)
plt.xlabel("Subject's answer")
plt.ylabel('Correct answer')
#plt.show()

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix5, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['identified', 'not identified'], 
            yticklabels=['relevant', 'not relevant'])

# 添加标题
plt.title('Confusion Matrix (total counts) for p2-only group food dim identification',fontsize=14)
plt.xlabel("Subject's answer")
plt.ylabel('Correct answer')
#plt.show()

metrics1 = calculate_metrics(conf_matrix1)
metrics2 = calculate_metrics(conf_matrix2)
metrics3 = calculate_metrics(conf_matrix3)
metrics4 = calculate_metrics(conf_matrix4)
metrics5 = calculate_metrics(conf_matrix5)

for metric, value in metrics1.items():
    print(f"p1_item_{metric}: {value:.2f}")
for metric, value in metrics2.items():
    print(f"p2_item_{metric}: {value:.2f}")
for metric, value in metrics3.items():
    print(f"p2only_item_{metric}: {value:.2f}")
for metric, value in metrics4.items():
    print(f"p2_dim_{metric}: {value:.2f}")
for metric, value in metrics5.items():
    print(f"p2only_dim_{metric}: {value:.2f}")

#FPR-food item, 3group
x1 = metrics1['False Positive Rate']
x2 = metrics2['False Positive Rate']
x3 = metrics3['False Positive Rate']
values = [x1, x2, x3]
percentages = [x * 100 for x in values]
labels = ['p1p2-p1','p1p2-p2','p2-only']
fig,ax = plt.subplots()
bars = plt.bar(labels, percentages, color = ['mistyrose', 'lightblue', 'thistle'],hatch=['///', '...', '**'])
plt.title('Comparison of FPR for 3 group in relevant food item identificaiton')
plt.xlabel('Groups')
plt.ylabel('Percentage of FPR')
plt.ylim(0,40)
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 1), ha='center', va='bottom')
plt.show()

#FPR-food dim, 2group
x4 = metrics4['False Positive Rate']
x5 = metrics5['False Positive Rate']
values = [x4, x5]
percentages = [x * 100 for x in values]
labels = ['p1p2-p2','p2-only']
fig,ax = plt.subplots()
bars = plt.bar(labels, percentages, color = [ 'lightblue', 'thistle'],hatch=['...', '**'])
plt.title('Comparison of FPR for 2 group in relevant food dim identificaiton')
plt.xlabel('Groups')
plt.ylabel('Percentage of FPR')
plt.ylim(0,50)
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 1), ha='center', va='bottom')
plt.show()

#Accuracy - food item, 3group
x1 = metrics1['Accuracy']
x2 = metrics2['Accuracy']
x3 = metrics3['Accuracy']
values = [x1, x2, x3]
percentages = [x * 100 for x in values]
labels = ['p1p2-p1','p1p2-p2','p2-only']
fig,ax = plt.subplots()
bars = plt.bar(labels, percentages, color = ['mistyrose', 'lightblue', 'thistle'],hatch=['///', '...', '**'])
plt.title('Comparison of Accuracy for 3 group in relevant food item identificaiton')
plt.xlabel('Groups')
plt.ylabel('Percentage of FPR')
plt.ylim(0,80)
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 1), ha='center', va='bottom')
plt.show()

#Accuracy - food dim, 2group


In [None]:
# 计算每个被试的False positive rate,Accuracy，identified num, if correct,保存到indicator表中
#1. p1 food item
#2. p2 food item
#3. p2only food item
#4. p2 food dim
#5. p2only food dim

desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
directory_path = os.path.join(desktop_path, 'human_project_data','after_sorting_csv')
file_path1 = os.path.join(directory_path, 'food_item_p1_sort.csv')
file_path2 = os.path.join(directory_path, 'food_item_p2_sort.csv')
file_path3 = os.path.join(directory_path, 'food_item_p2only_sort.csv')
file_path4 = os.path.join(directory_path, 'food_dim_p2_sort.csv')
file_path5 = os.path.join(directory_path, 'food_dim_p2only_sort.csv')

d1 = pd.read_csv(file_path1)
d2 = pd.read_csv(file_path2)
d3 = pd.read_csv(file_path3)
d4 = pd.read_csv(file_path4)
d5 = pd.read_csv(file_path5)


df1 = pd.read_csv(file_path1)
df2 = pd.read_csv(file_path2)
df3 = pd.read_csv(file_path3)
df4 = pd.read_csv(file_path4)
df5 = pd.read_csv(file_path5)

df1[df1 != 0] = 1
df2[df2 != 0] = 1
df3[df3 != 0] = 1
df4[df4 != 0] = 1
df5[df5 != 0] = 1

t1 = [1,1,1,1,0,0,0,0,0]
t23 = [1,1,1,1,0,0,0,0,0,0,0,0]
t45 = [1,1,0,0]

df1_arr = df1.to_numpy()
df2_arr = df2.to_numpy()
df3_arr = df3.to_numpy()
df4_arr = df4.to_numpy()
df5_arr = df5.to_numpy()

FPR_dic1 = {}
Acc_dic1 = {}
Identified_num_dic1 = {}
for subject, answer in df1.items():
    tp, tn, fp, fn = calculate_confusion_matrix_elements(t1, answer,1)
    fpr = fp / (fp + tn) 
    acc = (tp + tn) / (tp + fn + fp + tn) 
    identified_num = np.sum(answer)
    FPR_dic1[subject] = fpr
    Acc_dic1[subject] = acc
    Identified_num_dic1[subject] = identified_num

    

FPR_dic2 = {}
Acc_dic2 = {}
Identified_num_dic2 = {}
for subject, answer in df2.items():
    tp, tn, fp, fn = calculate_confusion_matrix_elements(t23, answer,1)
    fpr = fp / (fp + tn) 
    acc = (tp + tn) / (tp + fn + fp + tn) 
    identified_num = np.sum(answer)
    FPR_dic2[subject] = fpr
    Acc_dic2[subject] = acc
    Identified_num_dic2[subject] = identified_num
    

FPR_dic3 = {}
Acc_dic3 = {}
Identified_num_dic3 = {}
for subject, answer in df3.items():
    tp, tn, fp, fn = calculate_confusion_matrix_elements(t23, answer,1)
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    acc = (tp + tn) / (tp + fn + fp + tn) if (tp + fn + fp + tn) > 0 else 0
    identified_num = np.sum(answer)
    FPR_dic3[subject] = fpr
    Acc_dic3[subject] = acc
    Identified_num_dic3[subject] = identified_num
    

FPR_dic4 = {}
Acc_dic4 = {}
Identified_num_dic4 = {}
for subject, answer in df4.items():
    tp, tn, fp, fn = calculate_confusion_matrix_elements(t45, answer,1)
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    acc = (tp + tn) / (tp + fn + fp + tn) if (tp + fn + fp + tn) > 0 else 0
    identified_num = np.sum(answer)
    FPR_dic4[subject] = fpr
    Acc_dic4[subject] = acc
    Identified_num_dic4[subject] = identified_num


FPR_dic5 = {}
Acc_dic5 = {}
Identified_num_dic5 = {}
for subject, answer in df5.items():
    tp, tn, fp, fn = calculate_confusion_matrix_elements(t45, answer,1)
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    acc = (tp + tn) / (tp + fn + fp + tn) if (tp + fn + fp + tn) > 0 else 0
    identified_num = np.sum(answer)
    FPR_dic5[subject] = fpr
    Acc_dic5[subject] = acc
    Identified_num_dic5[subject] = identified_num


If_correct1_4 = {} 
for subject, answer in d4.items():
    if answer[0]> 5 and answer[1]>5 and answer[2]<5 and answer[3]<5:
        If_correct1_4[subject] = 1
    else: If_correct1_4[subject] = 0
    print(type(answer))

If_correct2_4 = {} 
for subject, answer in d4.items():
    if answer[0]>5 and answer[1]>5 and answer[2] == 0 and answer[3] == 0:
        If_correct2_4[subject] = 1
    else:If_correct2_4[subject] = 0

If_correct1_5 = {} #if correct 的操作对象应该是d4d5
for subject, answer in d5.items():
    if answer[0]> 5 and answer[1]>5 and answer[2]<5 and answer[3]<5:
        If_correct1_5[subject] = 1
    else: If_correct1_5[subject] = 0

If_correct2_5 = {} 
for subject, answer in d5.items():
    if answer[0]>5 and answer[1]>5 and answer[2] == 0 and answer[3] == 0:
        If_correct2_5[subject] = 1
    else:If_correct2_5[subject] = 0


#对p2 food item 和p2 food dim中110302被试的key进行替换
if '111302.1' in FPR_dic2:
    FPR_dic2['111302'] = FPR_dic2.pop('111302.1')
if '111302.1' in Acc_dic2:
    Acc_dic2['111302'] = Acc_dic2.pop('111302.1')
if '111302.1' in Identified_num_dic2:
    Identified_num_dic2['111302'] = Identified_num_dic2.pop('111302.1')
if '111302.1' in FPR_dic4:
    FPR_dic4['111302'] = FPR_dic4.pop('111302.1')
if '111302.1' in Acc_dic4:
    Acc_dic4['111302'] = Acc_dic4.pop('111302.1')
if '111302.1' in Identified_num_dic4:
    Identified_num_dic4['111302'] = Identified_num_dic4.pop('111302.1')
if '111302.1' in If_correct1_4:
    If_correct1_4['111302'] = If_correct1_4.pop('111302.1')
if '111302.1' in If_correct2_4:
    If_correct2_4['111302'] = If_correct2_4.pop('111302.1')


#读取corr and score文件
directory_path = os.path.join(desktop_path, 'human_project_data','indicator_csv')
file_path1 = os.path.join(directory_path, 'p1 food item corr and score.csv')
file_path2 = os.path.join(directory_path, 'p2 food item corr and score.csv')
file_path3 = os.path.join(directory_path, 'p2only food item corr and score.csv')
file_path4 = os.path.join(directory_path, 'p2 food dim corr and score.csv')
file_path5 = os.path.join(directory_path, 'p2only food dim corr and score.csv')
csv1 = pd.read_csv(file_path1)
csv2 = pd.read_csv(file_path2)
csv3 = pd.read_csv(file_path3)
csv4 = pd.read_csv(file_path4)
csv5 = pd.read_csv(file_path5)




csv1['Subject'] = csv1['Subject'].astype(int)
csv2['Subject'] = csv2['Subject'].astype(int)
csv3['Subject'] = csv3['Subject'].astype(int)
csv4['Subject'] = csv4['Subject'].astype(int)
csv5['Subject'] = csv5['Subject'].astype(int)
csv1['Subject'] = csv1['Subject'].astype(str)
csv2['Subject'] = csv2['Subject'].astype(str)
csv3['Subject'] = csv3['Subject'].astype(str)
csv4['Subject'] = csv4['Subject'].astype(str)
csv5['Subject'] = csv5['Subject'].astype(str)

print(csv2['Subject'].dtype)

print(type(next(iter(FPR_dic1))))

csv1['False positive rate'] = csv1['Subject'].map(FPR_dic1)
csv2['False positive rate'] = csv2['Subject'].map(FPR_dic2)
csv3['False positive rate'] = csv3['Subject'].map(FPR_dic3)
csv4['False positive rate'] = csv4['Subject'].map(FPR_dic4)
csv5['False positive rate'] = csv5['Subject'].map(FPR_dic5)

csv1['Accuracy'] = csv1['Subject'].map(Acc_dic1)
csv2['Accuracy'] = csv2['Subject'].map(Acc_dic2)
csv3['Accuracy'] = csv3['Subject'].map(Acc_dic3)
csv4['Accuracy'] = csv4['Subject'].map(Acc_dic4)
csv5['Accuracy'] = csv5['Subject'].map(Acc_dic5)

csv1['Identified number'] = csv1['Subject'].map(Identified_num_dic1)
csv2['Identified number'] = csv2['Subject'].map(Identified_num_dic2)
csv3['Identified number'] = csv3['Subject'].map(Identified_num_dic3)
csv4['Identified number'] = csv4['Subject'].map(Identified_num_dic4)
csv5['Identified number'] = csv5['Subject'].map(Identified_num_dic5)

csv4['If correct1'] = csv4['Subject'].map(If_correct1_4)
csv5['If correct1'] = csv5['Subject'].map(If_correct1_5)
csv4['If correct2'] = csv4['Subject'].map(If_correct2_4)
csv5['If correct2'] = csv5['Subject'].map(If_correct2_5)

#用distance计算correct index
csv1['Correct Index'] = 1 / (csv1['Correlation coefficient'] + 1)
csv2['Correct Index'] = 1 / (csv2['Correlation coefficient'] + 1)
csv3['Correct Index'] = 1 / (csv3['Correlation coefficient'] + 1)
csv4['Correct Index'] = 1 / (csv4['Correlation coefficient'] + 1)
csv5['Correct Index'] = 1 / (csv5['Correlation coefficient'] + 1)

output_file_path1 = os.path.join(directory_path, 'p1 food item answer indicator.csv')
csv1.to_csv(output_file_path1)
output_file_path2 = os.path.join(directory_path, 'p2 food item answer indicator.csv')
csv2.to_csv(output_file_path2)
output_file_path3 = os.path.join(directory_path, 'p2only food item answer indicator.csv')
csv3.to_csv(output_file_path3)
output_file_path4 = os.path.join(directory_path, 'p2 food dim answer indicator.csv')
csv4.to_csv(output_file_path4)
output_file_path5 = os.path.join(directory_path, 'p2only food dim answer indicator.csv')
csv5.to_csv(output_file_path5)


In [None]:
# if correct 和 if full score confusion matrix

In [None]:
# spearman corr 和 probe fraction/ stick fraction 的关系

In [None]:
# spearman corr 和 “exploration index” 的关系

In [None]:
# false rate 和 score 的关系， 散点图