Metriсa	-> Significato
Hit@1	-> Percentuale di volte in cui la prima predizione era corretta.
Hit@10	-> Percentuale di volte in cui la risposta corretta era tra le prime 10 scelte.
Accuracy	-> Spesso uguale a Hit@1, ma può variare a seconda della definizione usata.
Totale query -> Numero complessivo di predizioni analizzate.

In [20]:
import os
import pandas as pd
from sklearn.metrics import f1_score
import ast
import numpy as np


## Utils

In [21]:
# Calcola l’accuratezza top-1 (predizione esatta).
# Calcola l’F1-score pesato, utile in caso di classi sbilanciate.
def get_acc1_f1(df):
    acc1 = (df['prediction'] == df['ground_truth']).sum() / len(df)
    f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted')
    return acc1, f1

# Aggiunge flag booleani al DataFrame:
# is_correct1, is_correct5, is_correct10 → indica se la ground_truth è contenuta nella top 1/5/10 predizioni.
def get_is_correct(row):
    pred_list = row['prediction']
    if row['ground_truth'] in pred_list:
        row['is_correct'] = True
    else:
        row['is_correct'] = False
    
    return row


def get_is_correct10(row):
    pred_list = row['top10']
    if row['ground_truth'] in pred_list:
        row['is_correct10'] = True
    else:
        row['is_correct10'] = False
        
    pred_list = row['top5']
    if row['ground_truth'] in pred_list:
        row['is_correct5'] = True
    else:
        row['is_correct5'] = False

    pred = row['top1']
    if pred == row['ground_truth']:
        row['is_correct1'] = True
    else:
        row['is_correct1'] = False
    
    return row

# 📌 first_nonzero: usato internamente da get_ndcg, serve per trovare il primo indice in cui si ha match (utile per calcolare la rank).
def first_nonzero(arr, axis, invalid_val=-1):
    mask = arr!=0
    return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)

# 📈 Calcola l’NDCG@k (Normalized Discounted Cumulative Gain).
# Tiene conto della posizione della risposta corretta nel ranking (più è in alto, meglio è).
# Valore tra 0 (peggiore) e 1 (perfetto).
def get_ndcg(prediction, targets, k=10):
    """
    Calculates the NDCG score for the given predictions and targets.

    Args:
        prediction (Nxk): list of lists. the softmax output of the model.
        targets (N): torch.LongTensor. actual target place id.

    Returns:
        the sum ndcg score
    """
    for _, xi in enumerate(prediction):
        if len(xi) < k:
            #print(f"the {i}th length: {len(xi)}")
            xi += [-5 for _ in range(k-len(xi))]
        elif len(xi) > k:
            xi = xi[:k]
        else:
            pass
    
    n_sample = len(prediction)
    prediction = np.array(prediction)
    targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape)
    hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1)
    hits = hits[hits>=0]
    ranks = hits + 1
    ndcg = 1 / np.log2(ranks + 1)
    return np.sum(ndcg) / n_sample
    

## Top10

In [22]:
# Calculate the metric for all user
output_dir = 'output/geolife/top10_wot'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
# print(file_list)
file_path_list = [os.path.join(output_dir, file) for file in file_list]
#file_path_list = file_path_list[:1]
#print(file_path_list)

df = pd.DataFrame({
    'user_id': None,
    'ground_truth': None,
    'prediction': None,
    'reason': None
}, index=[])

for file_path in file_path_list:
    iter_df = pd.read_csv(file_path)
    if output_dir[-1] != '1':
        pred_series = iter_df['prediction'].apply(lambda x: ast.literal_eval(x))  # A pandas series
        iter_df['top10'] = pred_series.apply(lambda x: x[:10] if type(x) == list else [x] * 10)
        iter_df['top5'] = pred_series.apply(lambda x: x[:5] if type(x) == list else [x] * 5)
        iter_df['top1'] = pred_series.apply(lambda x: x[0] if type(x) == list else x)
        #iter_df['prediction'] = iter_df['prediction'].apply(lambda x: x.strip('][').split(', '))
    df = pd.concat([df, iter_df], ignore_index=True)
    

df = df.apply(func=get_is_correct10, axis=1)

print("number of samples: ", len(df))

acc1 = (df['is_correct1']).sum() / len(df)
acc5 = (df['is_correct5']).sum() / len(df)
acc10 = (df['is_correct10']).sum() / len(df)
f1 = f1_score(df['ground_truth'], df['top1'], average='weighted')
preds = df['top10'].tolist()
targets = np.array(df['ground_truth'].tolist())
ndcg = get_ndcg(prediction=preds, targets=targets, k=10)

print("Acc@1: ", acc1)
print("Acc@5: ", acc5)
print("Acc@10: ", acc10)
print("Weighted F1: ", f1)
print("NDCG@10: ", ndcg)

ValueError: malformed node or string: nan

# Top1

In [None]:
# Calculate the metric for all user
output_dir = 'output/geolife/top10_wot'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
print(file_list)
file_path_list = [os.path.join(output_dir, file) for file in file_list]

df = pd.DataFrame({
    'user_id': None,
    'ground_truth': None,
    'prediction': None,
    'reason': None
}, index=[])

for file_path in file_path_list:
    iter_df = pd.read_csv(file_path)
    df = pd.concat([df, iter_df], ignore_index=True)

df['prediction'] = df['prediction'].apply(lambda x: int(x))
df['ground_truth'] = df['ground_truth'].apply(lambda x: int(x))

acc1, f1 = get_acc1_f1(df)
print("Acc@1: ", acc1)
print("F1: ", f1)

['40.csv', '41.csv', '43.csv', '42.csv', '45.csv', '44.csv', '37.csv', '23.csv', '22.csv', '36.csv', '08.csv', '20.csv', '34.csv', '35.csv', '21.csv', '09.csv', '25.csv', '31.csv', '19.csv', '18.csv', '30.csv', '24.csv', '32.csv', '26.csv', '27.csv', '33.csv', '16.csv', '02.csv', '03.csv', '17.csv', '29.csv', '01.csv', '15.csv', '14.csv', '28.csv', '04.csv', '10.csv', '38.csv', '39.csv', '11.csv', '05.csv', '13.csv', '07.csv', '06.csv', '12.csv']


ValueError: invalid literal for int() with base 10: '[1, 1147, 1006, 976, 1003, 960, 960, 960, 960, 960]'