In [1]:
import os
import pandas as pd
from sklearn.metrics import ndcg_score
from scipy.stats import spearmanr
from scipy import stats
import math

folder_path = '.'


final_df = pd.DataFrame(columns=['protein', 'model', 'phenotype', 'split', 'fold_num','spearman', 'ndcg'])

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
       
        parts = filename.split('_')
        protein = parts[0]
        model = parts[1]
        phenotype = f"{parts[2]}_{parts[3].split('-')[0]}"
        split = parts[3].split('-')[1]
        fold_num = parts[3].split('-')[2]
        

       
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)

        predicted = df.iloc[:, 1].values  # 第二列
        labels = df.iloc[:, 2].values     # 第三列
        spearman, _ = stats.spearmanr(predicted, labels)
        k=math.floor(len(labels)*0.01)
        ndcg = ndcg_score([labels], [predicted],k=k)
        #print('ndcg:',round(predict_ndcg, 2))
        df.loc[0,'ndcg'] = round(ndcg, 2)
        df.loc[0,'spearman'] = round(spearman,2)

       
        final_df = final_df.append({
            'protein': protein,
            'model': model,
            'phenotype': phenotype,
            'split': split,
            'fold_num': fold_num,
            'spearman': spearman,
            'ndcg': ndcg
        }, ignore_index=True)


        grouped = final_df.groupby(['protein', 'model', 'phenotype', 'split'])

        
        valid_groups = grouped.filter(lambda x: len(x) >= 1)

        result = valid_groups.groupby(['protein', 'model', 'phenotype', 'split']).agg({
            'spearman': 'mean',
            'ndcg': 'mean'
        }).reset_index()
        result['spearman'] = result['spearman'].round(2)
        result['ndcg'] = result['ndcg'].round(2)

file_name = f"{protein}_result.csv"
folder_path = '..'  # 上级文件夹
file_path = os.path.join(folder_path, file_name)

# 保存到CSV文件
result.to_csv(file_path, index=False)



