In [1]:
import numpy as np
import pandas as pd
from RecGroupSys import VotingMethods

In [2]:
df_users = pd.read_csv('../data/df_users_train.csv')

In [3]:
df_groups = pd.read_pickle('df_groups_svdpp_random_popular_xgb')
df_groups.head()

Unnamed: 0,members,members2,preds_svdpp,preds_random,preds_popular,preds_xgb
0,"['Sam Vimaire', 'Jhopper88']","[Sam Vimaire, Jhopper88]",2651 110327 266507 115746 0 9...,128996 201 8203 244331 140934 177...,161936 224517 233078 187645 182028 115...,BGGId 93260 1506...
1,"['Kehmh', 'dracosf2']","[Kehmh, dracosf2]",15369 25292 198953 243 0 9.657...,20542 62219 103885 124361 19643 277...,161936 224517 233078 187645 182028 115...,BGGId 191004 155068 ...
2,"['Drury67', 'sjackson080', 'mosaicu']","[Drury67, sjackson080, mosaicu]",121 233078 37111 93260 158889 ...,240196 3279 235655 8924 130960 203...,161936 224517 233078 187645 182028 115...,BGGId 28720 11...
3,"['CallMeTim72', 'calderyn', 'leochab', 'sirei'...","[CallMeTim72, calderyn, leochab, sirei, Abekon...",233078 246900 3076 12493 1...,4209 257527 148949 121657 59959 721...,161936 224517 233078 187645 182028 115...,BGGId 32674 2053...
4,"['alwaystang', 'Candorras', 'jmellby', 'Manpac...","[alwaystang, Candorras, jmellby, Manpachism, C...",18098 134352 302723 219215 1...,225694 36553 236217 63778 206051 171...,161936 224517 233078 187645 182028 115...,BGGId 218333 528...


In [6]:
RELEVANCE_THRESHOLD = 7


def precision_at_k(recommendations_df, user_history_df, k=10, threshold=RELEVANCE_THRESHOLD):
    recommended_ids = list(recommendations_df.index)
    if len(recommendations_df) > k:
        recommended_ids = recommended_ids[:k]
    relevant_items = user_history_df[user_history_df['Rating'] >= threshold]['BGGId']
    hits = pd.Series(recommended_ids).isin(relevant_items).sum()
    return hits / k


def ndcg_at_k(recommendations_df, user_history_df, k=10):
    recommended_ids = list(recommendations_df.index)
    if len(recommendations_df) > k:
        recommended_ids = recommended_ids[:k]

    actual_k = len(recommended_ids)

    if actual_k == 0:
        return 0.0

    temp_recs_df = pd.DataFrame({'BGGId': recommended_ids})
    merged_df = pd.merge(temp_recs_df, user_history_df, on='BGGId', how='left')

    relevance = merged_df['Rating'].fillna(0).values

    dcg = np.sum(relevance / np.log2(np.arange(2, actual_k + 2)))

    ideal_relevance = np.sort(relevance)[::-1]
    idcg = np.sum(ideal_relevance / np.log2(np.arange(2, actual_k + 2)))

    if idcg == 0:
        return 0.0

    return dcg / idcg


results = []

for grp_id in range(1):
    users = df_groups.loc[grp_id, 'members2']
    groups_results = []

    for prd in ['preds_svdpp', 'preds_random', 'preds_popular', 'preds_xgb']:

        aux = df_groups.loc[grp_id, prd].fillna(0)
        aux[aux == 0] = 0.1
        for aggmethod in [VotingMethods.average, VotingMethods.multiplicative,
                          VotingMethods.borda_count, VotingMethods.copeland_score, VotingMethods.approval_voting,
                          VotingMethods.least_misery, VotingMethods.most_pleasure, VotingMethods.average_without_misery,
                          VotingMethods.fairness, VotingMethods.most_respected_person]:
            _pr = []
            _nd = []

            rdf = aggmethod(aux)
            if aggmethod.__name__ == 'fairness':
                lx = np.linspace(10, 1, len(rdf))
                rdf = pd.Series(lx, index=rdf)
            rdf = rdf.sort_values(ascending=False)

            for idx, usr_name in enumerate(users):
                usr_hist = df_users[df_users.Username == usr_name]
                _pr.append(precision_at_k(rdf, usr_hist))
                _nd.append(ndcg_at_k(rdf, usr_hist))

            groups_results.append({prd: {aggmethod.__name__: {'precision': np.mean(_pr), 'ndcg': np.mean(_nd)}}})
    results.append(groups_results)

print('Done')


  aux[aux == 0] = 0.1


Done


In [11]:
flat_data = []
for run_data in results:
    for item in run_data:
        model_name = list(item.keys())[0]
        method_data = item[model_name]
        method_name = list(method_data.keys())[0]
        metrics = method_data[method_name]
        flat_data.append({
            "model": model_name,
            "method": method_name,
            "precision": metrics["precision"],
            "ndcg": metrics["ndcg"]
        })

# Crear el DataFrame
df = pd.DataFrame(flat_data)
df

Unnamed: 0,model,method,precision,ndcg
0,preds_svdpp,average,0.1,0.5
1,preds_svdpp,multiplicative,0.1,0.5
2,preds_svdpp,borda_count,0.1,0.465844
3,preds_svdpp,copeland_score,0.1,0.5
4,preds_svdpp,approval_voting,0.1,0.5
5,preds_svdpp,least_misery,0.1,0.5
6,preds_svdpp,most_pleasure,0.1,0.5
7,preds_svdpp,average_without_misery,0.0,0.0
8,preds_svdpp,fairness,0.1,0.465844
9,preds_svdpp,most_respected_person,0.1,0.5


In [12]:
# Agrupar y calcular promedio y desviación estándar
agg_results = df.groupby(['model', 'method']).agg(['mean', 'std'])

def create_metric_table(agg_df, metric_name):
    """Formatea la tabla para una métrica específica (mean ± std)."""
    # Seleccionar los datos de la métrica
    metric_df = agg_df[metric_name]

    # Formatear la celda como 'promedio ± std'
    # .fillna(0) se usa por si hay un caso con una sola medición (std = NaN)
    formatted_series = (
        metric_df['mean'].map('{:.3f}'.format) +
        ' ± ' +
        metric_df['std'].fillna(0).map('{:.3f}'.format)
    )

    # Reorganizar la tabla a la forma final: modelos en filas, métodos en columnas
    return formatted_series.unstack('method')

# --- Generar y mostrar las tablas ---

# Tabla para Precision
precision_table = create_metric_table(agg_results, 'precision')
print("--- Tabla de Resultados: Precision (Promedio ± Std) ---")
display(precision_table)

# Tabla para NDCG
ndcg_table = create_metric_table(agg_results, 'ndcg')
print("\n--- Tabla de Resultados: NDCG (Promedio ± Std) ---")
display(ndcg_table)

--- Tabla de Resultados: Precision (Promedio ± Std) ---


method,approval_voting,average,average_without_misery,borda_count,copeland_score,fairness,least_misery,most_pleasure,most_respected_person,multiplicative
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
preds_popular,0.300 ± 0.000,0.300 ± 0.000,0.150 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000
preds_random,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.150 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000
preds_svdpp,0.100 ± 0.000,0.100 ± 0.000,0.000 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000
preds_xgb,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000



--- Tabla de Resultados: NDCG (Promedio ± Std) ---


method,approval_voting,average,average_without_misery,borda_count,copeland_score,fairness,least_misery,most_pleasure,most_respected_person,multiplicative
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
preds_popular,0.652 ± 0.000,0.652 ± 0.000,0.720 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000
preds_random,0.231 ± 0.000,0.424 ± 0.000,0.500 ± 0.000,0.424 ± 0.000,0.193 ± 0.000,0.294 ± 0.000,0.637 ± 0.000,0.294 ± 0.000,0.193 ± 0.000,0.424 ± 0.000
preds_svdpp,0.500 ± 0.000,0.500 ± 0.000,0.000 ± 0.000,0.466 ± 0.000,0.500 ± 0.000,0.466 ± 0.000,0.500 ± 0.000,0.500 ± 0.000,0.500 ± 0.000,0.500 ± 0.000
preds_xgb,0.151 ± 0.000,0.250 ± 0.000,0.000 ± 0.000,0.193 ± 0.000,0.000 ± 0.000,0.178 ± 0.000,0.151 ± 0.000,0.250 ± 0.000,0.000 ± 0.000,0.250 ± 0.000


In [13]:
precision_table

method,approval_voting,average,average_without_misery,borda_count,copeland_score,fairness,least_misery,most_pleasure,most_respected_person,multiplicative
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
preds_popular,0.300 ± 0.000,0.300 ± 0.000,0.150 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000,0.300 ± 0.000
preds_random,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.150 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000
preds_svdpp,0.100 ± 0.000,0.100 ± 0.000,0.000 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000,0.100 ± 0.000
preds_xgb,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000,0.050 ± 0.000,0.050 ± 0.000,0.000 ± 0.000,0.050 ± 0.000


In [14]:
ndcg_table

method,approval_voting,average,average_without_misery,borda_count,copeland_score,fairness,least_misery,most_pleasure,most_respected_person,multiplicative
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
preds_popular,0.652 ± 0.000,0.652 ± 0.000,0.720 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000,0.652 ± 0.000
preds_random,0.231 ± 0.000,0.424 ± 0.000,0.500 ± 0.000,0.424 ± 0.000,0.193 ± 0.000,0.294 ± 0.000,0.637 ± 0.000,0.294 ± 0.000,0.193 ± 0.000,0.424 ± 0.000
preds_svdpp,0.500 ± 0.000,0.500 ± 0.000,0.000 ± 0.000,0.466 ± 0.000,0.500 ± 0.000,0.466 ± 0.000,0.500 ± 0.000,0.500 ± 0.000,0.500 ± 0.000,0.500 ± 0.000
preds_xgb,0.151 ± 0.000,0.250 ± 0.000,0.000 ± 0.000,0.193 ± 0.000,0.000 ± 0.000,0.178 ± 0.000,0.151 ± 0.000,0.250 ± 0.000,0.000 ± 0.000,0.250 ± 0.000


In [15]:
precision_table

Index(['approval_voting', 'average', 'average_without_misery', 'borda_count',
       'copeland_score', 'fairness', 'least_misery', 'most_pleasure',
       'most_respected_person', 'multiplicative'],
      dtype='object', name='method')

In [None]:
a = set(recommended_ids)
b = set(relevant_items)

In [None]:
a.intersection(b)

In [None]:
b.intersection(a)