In [None]:
import pandas as pd
import numpy as np
from scipy.stats import kendalltau
from sklearn.metrics import confusion_matrix

def rmse(y, y_hat):
    y = np.array(y)
    y_hat = np.array(y_hat)
    return np.sqrt(np.mean((y - y_hat) ** 2))


def report_results_metrics(results_path):
    results = pd.read_csv(results_path, sep=";")
    cols = ['project_id', 'real_votes', 'real_rank', 'predicted_votes']
    results = results.filter(cols)

    y_test = results['real_votes'].values
    y_hat = results['predicted_votes'].values
    
    tau, p_value = kendalltau(y_test, y_hat)

    print(f"RMSE: {rmse(y_test, y_hat):.4f}")
    print(f"Kendall-thau: {tau:.4f}, (p-value: {p_value:.4f})")


def report_metrics(result_path, top_k=5):
    
    results = pd.read_csv(result_path, sep=";")
    cols = ['project_id', 'real_votes', 'predicted_votes']
    results = results.filter(cols)

    top_k_real_ids = results.nlargest(top_k, 'real_votes')['project_id'].tolist()
    top_k_pred_ids = results.nlargest(top_k, 'predicted_votes')['project_id'].tolist()

    results['is_real_topk'] = results['project_id'].isin(top_k_real_ids).astype(int)
    results['is_predicted_topk'] = results['project_id'].isin(top_k_pred_ids).astype(int)

    #top_k sets
    topk_real_ids = set(results.nlargest(top_k, 'real_votes')['project_id'])
    topk_pred_ids = set(results.nlargest(top_k, 'predicted_votes')['project_id'])


    cm = confusion_matrix(results['is_real_topk'], results['is_predicted_topk'])
    TN, FP, FN, TP = cm.ravel()

    accuracy = (TP + TN) / (TP + TN + FP + FN)
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

    # Jaccard Calculation
    intersection = topk_real_ids & topk_pred_ids
    union = topk_real_ids | topk_pred_ids
    jaccard_index = len(intersection) / len(union) if len(union) > 0 else 0.0

    return {
        "top_k": top_k,
        "accuracy": accuracy,
        "recall": recall,
        "precision": precision,
        "f1_score": f1_score,
        "jaccard_index": jaccard_index
    }

name = 'wrc_simple_prompt_full_1'
results_path = f'output/predictions/{name}.csv'
report_results_metrics(results_path)
print("##")
data = []
for i in range(15):
    data.append(report_metrics(results_path, top_k=i+1))

top_k_ds = pd.DataFrame(data)
top_k_ds['label'] = name
top_k_ds

RMSE: 2398.7678
Kendall-thau: 0.1518, (p-value: 0.0068)
##


Unnamed: 0,top_k,accuracy,recall,precision,f1_score,jaccard_index,label
0,1,0.989071,0.0,0.0,0.0,0.0,tls_simple_prompt_full_1
1,2,0.978142,0.0,0.0,0.0,0.0,tls_simple_prompt_full_1
2,3,0.967213,0.0,0.0,0.0,0.0,tls_simple_prompt_full_1
3,4,0.956284,0.0,0.0,0.0,0.0,tls_simple_prompt_full_1
4,5,0.956284,0.2,0.2,0.2,0.111111,tls_simple_prompt_full_1
5,6,0.945355,0.166667,0.166667,0.166667,0.090909,tls_simple_prompt_full_1
6,7,0.934426,0.142857,0.142857,0.142857,0.076923,tls_simple_prompt_full_1
7,8,0.934426,0.25,0.25,0.25,0.142857,tls_simple_prompt_full_1
8,9,0.934426,0.333333,0.333333,0.333333,0.2,tls_simple_prompt_full_1
9,10,0.945355,0.5,0.5,0.5,0.333333,tls_simple_prompt_full_1


In [None]:
files = ["tls_knn_pca2_full_1",
         ]