In [20]:
import pandas as pd
import numpy as np
from scipy.stats import kendalltau
from sklearn.metrics import confusion_matrix

def rmse(y, y_hat):
    y = np.array(y)
    y_hat = np.array(y_hat)
    return np.sqrt(np.mean((y - y_hat) ** 2))


def report_results_metrics(results_path):
    results = pd.read_csv(results_path, sep=";")
    cols = ['project_id', 'real_votes', 'real_rank', 'predicted_votes']
    results = results.filter(cols)

    y_test = results['real_votes'].values
    y_hat = results['predicted_votes'].values
    
    tau, p_value = kendalltau(y_test, y_hat)

    print(f"RMSE: {rmse(y_test, y_hat):.4f}")
    print(f"Kendall-thau: {tau:.4f}, (p-value: {p_value:.4f})")


def report_metrics(result_path, top_k=5):
    
    results = pd.read_csv(result_path, sep=";")
    cols = ['project_id', 'real_votes', 'predicted_votes']
    results = results.filter(cols)

    top_k_real_ids = results.nlargest(top_k, 'real_votes')['project_id'].tolist()
    top_k_pred_ids = results.nlargest(top_k, 'predicted_votes')['project_id'].tolist()

    results['is_real_topk'] = results['project_id'].isin(top_k_real_ids).astype(int)
    results['is_predicted_topk'] = results['project_id'].isin(top_k_pred_ids).astype(int)

    #top_k sets
    topk_real_ids = set(results.nlargest(top_k, 'real_votes')['project_id'])
    topk_pred_ids = set(results.nlargest(top_k, 'predicted_votes')['project_id'])


    cm = confusion_matrix(results['is_real_topk'], results['is_predicted_topk'])
    TN, FP, FN, TP = cm.ravel()

    accuracy = (TP + TN) / (TP + TN + FP + FN)
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

    # Jaccard Calculation
    intersection = topk_real_ids & topk_pred_ids
    union = topk_real_ids | topk_pred_ids
    jaccard_index = len(intersection) / len(union) if len(union) > 0 else 0.0

    return {
        "top_k": top_k,
        "accuracy": accuracy,
        "recall": recall,
        "precision": precision,
        "f1_score": f1_score,
        "jaccard_index": jaccard_index
    }

name = 'wrc_rag_sb_full_1'
results_path = f'output/predictions/{name}.csv'
report_results_metrics(results_path)
print("##")
data = []
for i in range(15):
    data.append(report_metrics(results_path, top_k=i+1))

top_k_ds = pd.DataFrame(data)
top_k_ds['label'] = name
top_k_ds

RMSE: 2795.2021
Kendall-thau: 0.2503, (p-value: 0.0128)
##


Unnamed: 0,top_k,accuracy,recall,precision,f1_score,jaccard_index,label
0,1,0.96,0.0,0.0,0.0,0.0,wrc_rag_sb_full_1
1,2,0.96,0.5,0.5,0.5,0.333333,wrc_rag_sb_full_1
2,3,0.92,0.333333,0.333333,0.333333,0.2,wrc_rag_sb_full_1
3,4,0.88,0.25,0.25,0.25,0.142857,wrc_rag_sb_full_1
4,5,0.84,0.2,0.2,0.2,0.111111,wrc_rag_sb_full_1
5,6,0.8,0.166667,0.166667,0.166667,0.090909,wrc_rag_sb_full_1
6,7,0.76,0.142857,0.142857,0.142857,0.076923,wrc_rag_sb_full_1
7,8,0.76,0.25,0.25,0.25,0.142857,wrc_rag_sb_full_1
8,9,0.72,0.222222,0.222222,0.222222,0.125,wrc_rag_sb_full_1
9,10,0.72,0.3,0.3,0.3,0.176471,wrc_rag_sb_full_1


In [22]:
files = [
    "tls_knn_pca2_full_1",
    "tls_pva_pca10_full_1",
    "tls_simple_prompt_full_1",
    "tls_rag_cot_full_1",
    "tls_rag_sb_full_1",
    "tls_in_context_full_1",
    "wrc_knn_pca15_full_1",
    "wrc_pva_pca20_full_1",
    "wrc_simple_prompt_full_1",
    "wrc_rag_cot_full_1",
    "wrc_rag_sb_full_1",
    "wrc_in_context_full_1"
    ]

l = []
for f in files:
    results_path = f'output/predictions/{f}.csv'
    report_results_metrics(results_path)
    print("##")
    data = []
    for i in range(15):
        data.append(report_metrics(results_path, top_k=i+1))

    top_k_ds = pd.DataFrame(data)
    top_k_ds['label'] = f
    l.append(top_k_ds)

RMSE: 97.8479
Kendall-thau: 0.2094, (p-value: 0.0000)
##
RMSE: 100.6347
Kendall-thau: 0.2376, (p-value: 0.0000)
##
RMSE: 2398.7678
Kendall-thau: 0.1518, (p-value: 0.0068)
##
RMSE: 118.9687
Kendall-thau: 0.3283, (p-value: 0.0000)
##
RMSE: 113.1531
Kendall-thau: 0.3316, (p-value: 0.0000)
##
RMSE: 102.2516
Kendall-thau: 0.2917, (p-value: 0.0000)
##
RMSE: 2479.3909
Kendall-thau: 0.3059, (p-value: 0.0018)
##
RMSE: 2406.7283
Kendall-thau: 0.4155, (p-value: 0.0000)
##
RMSE: 2798.8257
Kendall-thau: 0.3158, (p-value: 0.0047)
##
RMSE: 3206.8066
Kendall-thau: 0.2157, (p-value: 0.0313)
##
RMSE: 2795.2021
Kendall-thau: 0.2503, (p-value: 0.0128)
##
RMSE: 2783.5101
Kendall-thau: 0.4361, (p-value: 0.0000)
##


In [23]:
topk_results = pd.concat(l).reset_index(drop=True)
topk_results.to_csv('topk_results.csv', sep=";", index=False)
