In [1]:
import math

def calc_ndcg(relevance_list):
    dcg = 0
    for i, relevance in enumerate(relevance_list):
        dcg += (relevance) / math.log2(i + 1 + 1)
    idcg = 0
    ideal_rank = sorted(relevance_list, reverse=True)
    for i, relevance in enumerate(ideal_rank):
        idcg += (relevance) / math.log2(i + 1 + 1)
    if idcg > 0:
        ndcg = float(dcg) / idcg
    else:
        ndcg = 0
    return ndcg

def calc_mrr(y_pred):
    rr = [1/idx for idx, y in enumerate(y_pred, start=1) if y >= 3]
    
    if not rr:
        return 0.0
    
    return sum(rr) / len(rr)

def calc_recall_at_k(y_pred, k):
    relevant = len([y for y in y_true if y >= 3])
    if y_pred is None or not relevant:
        return None
    predicted = len([y for y in y_pred[:k] if y >= 3])

    return float(predicted) / float(relevant)

def calc_precision_at_k(y_pred, k):
    predicted = len([y for y in y_pred[:k] if y >= 3])

    return float(predicted) / float(k)

def calc_hit_at_k(y_pred, k):
    predicted = len([y for y in y_pred[:k] if y >= 3])
    
    if predicted > 0:
        return 1
    else:
        return 0 

In [2]:
import pandas as pd
import random
import torch

from tqdm import tqdm

tqdm.pandas()

In [25]:
category = 'Games'
signal = 'write'
num_candidates = 20

model_name = f'ciao{category}_{signal}_chat_b_8_c_{num_candidates}_bs_1_lr_0.0005_pw_v3_prompt'

In [26]:
predictions = pd.read_csv(f'/sise/bshapira-group/lilachzi/models/LlamaRec/experiments/{model_name}/predictions.csv')
predictions['y'] = predictions['y'].apply(eval)
predictions['rank'] = predictions['rank'].apply(eval)

In [28]:
predictions[predictions['y'].apply(len) > 20].iloc[:, 3:].mean()

nDCG@5         0.443267
MRR            0.313197
Recall@1       0.206568
Recall@3       0.425946
Recall@5       0.557901
Precision@1    0.269914
Precision@3    0.208023
Precision@5    0.175759
Hit@1          0.269914
Hit@3          0.459026
Hit@5          0.565043
dtype: float64

In [15]:
{k:v for k,v in predictions.iloc[:, 3:].mean().items()}

{'nDCG@5': 0.7252369699504181,
 'MRR': 0.6260673584318778,
 'Recall@1': 0.5296311832887031,
 'Recall@3': 0.762428327980918,
 'Recall@5': 0.8672254300900758,
 'Precision@1': 0.5894123606889564,
 'Precision@3': 0.32613559608240456,
 'Precision@5': 0.23478976697061807,
 'Hit@1': 0.5894123606889564,
 'Hit@3': 0.7553191489361702,
 'Hit@5': 0.8327001013171226}

In [52]:
test_set = pd.read_csv('/sise/bshapira-group/lilachzi/csvs/test_set.csv', converters={'y_true': eval})
test_set = test_set[test_set['category'] == category]
test_set

Unnamed: 0,category,product_id,voter_id,y_true
5256,Games,37720,5837129,"{2162: 4, 38695: 4, 30871: 0, 126090: 0, 13685..."
5257,Games,37720,6683044,"{2162: 4, 30871: 0, 38695: 0, 126090: 0, 13685..."
5258,Games,37720,5719918,"{2162: 4, 30871: 4, 177789: 4, 38695: 0, 12609..."
5259,Games,37720,5584168,"{2162: 4, 126090: 4, 30871: 0, 38695: 0, 13685..."
5260,Games,37720,5353345,"{2162: 4, 30871: 4, 38695: 0, 126090: 0, 13685..."
...,...,...,...,...
392816,Games,79736,5014428,"{303774: 3, 19087: 0, 24972: 0, 84856: 0, 8887..."
392822,Games,97440,21638,"{303793: 3, 161466: 0, 181849: 0, 206908: 0, 2..."
392844,Games,74467,18976,"{303857: 3, 5440: 0, 13735: 0, 14251: 0, 16643..."
392900,Games,18180,5001743,"{304171: 3, 22667: 0, 89083: 0, 108780: 0, 119..."


In [53]:
test_set

Unnamed: 0,category,product_id,voter_id,y_true
5256,Games,37720,5837129,"{2162: 4, 38695: 4, 30871: 0, 126090: 0, 13685..."
5257,Games,37720,6683044,"{2162: 4, 30871: 0, 38695: 0, 126090: 0, 13685..."
5258,Games,37720,5719918,"{2162: 4, 30871: 4, 177789: 4, 38695: 0, 12609..."
5259,Games,37720,5584168,"{2162: 4, 126090: 4, 30871: 0, 38695: 0, 13685..."
5260,Games,37720,5353345,"{2162: 4, 30871: 4, 38695: 0, 126090: 0, 13685..."
...,...,...,...,...
392816,Games,79736,5014428,"{303774: 3, 19087: 0, 24972: 0, 84856: 0, 8887..."
392822,Games,97440,21638,"{303793: 3, 161466: 0, 181849: 0, 206908: 0, 2..."
392844,Games,74467,18976,"{303857: 3, 5440: 0, 13735: 0, 14251: 0, 16643..."
392900,Games,18180,5001743,"{304171: 3, 22667: 0, 89083: 0, 108780: 0, 119..."


In [10]:
predictions['rank'] = predictions['rank'].apply(lambda rank: [r for r in rank if r >= 0])

In [16]:
predictions.dropna()['rank'].apply(lambda r: r[0]).unique()

array([ 0, 40, 10, 30, 20])

In [113]:
eval_predictions = pd.read_csv('/sise/bshapira-group/lilachzi/models/LlamaRec/experiments/ciaoGames_write_8B_10_v3/eval_predictions_2024_04_22_12_51.csv')
eval_predictions['y'] = eval_predictions['y'].apply(eval)
eval_predictions['rank'] = eval_predictions['rank'].apply(eval)
eval_predictions['y_true'] = eval_predictions['y_true'].apply(eval)

In [121]:
eval_predictions[eval_predictions['y_true'].apply(len)>0]

Unnamed: 0,y,rank,y_true,nDCG@5,MRR,Recall@1,Recall@3,Recall@5,Precision@1,Precision@3,Precision@5,Hit@1,Hit@3,Hit@5
45,"[0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, ...","[0, 3, 5, 6, 4, 2, 1, 8, 9, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.166667,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
46,"[0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 3, 5, 6, 4, 2, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.630930,0.500000,0.0,1.0,1.0,0.0,0.333333,0.2,0,1,1
47,"[0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 0.0, ...","[0, 3, 5, 6, 2, 4, 1, 8, 9, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.386853,0.162500,0.0,0.0,0.5,0.0,0.000000,0.2,0,0,1
48,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, ...","[0, 3, 5, 6, 2, 4, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.125000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
49,"[0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, ...","[0, 3, 5, 6, 2, 4, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.166667,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15733,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, ...","[0, 3, 5, 6, 4, 2, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.142857,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
15734,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, ...","[0, 3, 5, 6, 4, 2, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.111111,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
15735,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, ...","[0, 3, 5, 6, 4, 2, 1, 8, 9, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.111111,0.0,0.0,0.0,0.0,0.000000,0.0,0,0,0
15736,"[0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 3, 5, 6, 2, 4, 1, 9, 8, 7]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.386853,0.200000,0.0,0.0,1.0,0.0,0.000000,0.2,0,0,1


In [114]:
{k:v for k,v in eval_predictions.iloc[:, 3:].mean().items()}

{'nDCG@5': 0.4595567799634982,
 'MRR': 0.3404121869231881,
 'Recall@1': 0.14395041356636043,
 'Recall@3': 0.4344788677978747,
 'Recall@5': 0.6895856502297016,
 'Precision@1': 0.19243161286246763,
 'Precision@3': 0.19409522606186955,
 'Precision@5': 0.18637943015983324,
 'Hit@1': 0.19243161286246763,
 'Hit@3': 0.48063680586265717,
 'Hit@5': 0.7008023248468002}

In [115]:
eval_predictions['rank'].apply(lambda r: r[0]).unique()

array([0, 3])

In [117]:
eval_predictions[eval_predictions['y_true'].apply(len) > 10].iloc[:, 3:].mean()

nDCG@5         0.332078
MRR            0.210653
Recall@1       0.094644
Recall@3       0.285839
Recall@5       0.473763
Precision@1    0.141256
Precision@3    0.144795
Precision@5    0.144854
Hit@1          0.141256
Hit@3          0.321144
Hit@5          0.460631
dtype: float64