In [None]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import nvgpu
import os

gpu = np.argmin([g["mem_used_percent"] for g in nvgpu.gpu_info()]) 
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

from src.experiments.Common import load_best_model

## Obtener los resultados en test del modelo (**solo usuarios desconocidos**)

In [2]:
def obtain_results(datasets, models, users=0):
    '''users=0 todos, 1 solo conocidos, 2 desconocidos durante el entrenamiento'''
    results = []

    for dataset, subsets in datasets.items():
        for subset in subsets:
            for model in models:
                # Cargar configuración mejor modelo
                model_class = load_best_model(model=model, dataset=dataset, subset=subset, gpu=gpu)
                # Cargar el modelo entrenado
                model_class.train(dev=False, save_model=True)
                
                if users==1:
                    # Se buscan los usuarios de train y se dejan solo esos en test
                    train_dev_users = model_class.DATASET.DATA["TRAIN_DEV"].userId.unique()
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"][model_class.DATASET.DATA["TEST"]["userId"].isin(train_dev_users)]
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"].drop_duplicates(subset=["userId", "id_item"], keep='last', inplace=False)

                elif users==2:
                    # Se buscan los usuarios de train+dev y se eliminan de test
                    train_dev_users = model_class.DATASET.DATA["TRAIN_DEV"].userId.unique()
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"][~model_class.DATASET.DATA["TEST"]["userId"].isin(train_dev_users)]
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"].drop_duplicates(subset=["userId", "id_item"], keep='last', inplace=False)

                # Evaluar el modelo final
                result = model_class.evaluate(test=True, user_info=True)
                result["Model"] = model
                result["Set"] = dataset
                result["Subset"] = subset
                results.append(result.values[0])
                
    results = pd.DataFrame(results, columns=result.columns)
    # Poner nombres decentes para el artículo
    results["Set"].replace({"restaurants": "TAV-RSTS", "pois":"TAV-POIS", "amazon":"AM"}, inplace=True)
    results["Subset"].replace({"digital_music": "Music", "fashion":"Fashion", "gijon": "Gijón", "barcelona":"Barcelona", "madrid": "Madrid", "paris": "Paris", "newyorkcity": "New York", "london": "London"}, inplace=True)
    results["Model"].replace({"ATT2ITM": "AITRecX", "ATT2ITM_2": "AITRecX (NT)",}, inplace=True)

    return results

datasets = {"restaurants":["gijon", "barcelona", "madrid", "paris", "newyorkcity"],
            "pois":["barcelona", "madrid", "paris", "newyorkcity", "london"],
            "amazon":["fashion", "digital_music"]}

models = ["ATT2ITM", "ATT2ITM_2"]

results_all = obtain_results(datasets=datasets, models=models, users=0)
results_known = obtain_results(datasets=datasets, models=models, users=1)
results_unknown = obtain_results(datasets=datasets, models=models, users=2)

results = []
results_all["users"]="all"
results.extend(results_all.values)
results_known["users"]="known"
results.extend(results_known.values)
results_unknown["users"]="unknown"
results.extend(results_unknown.values)
results = pd.DataFrame(results, columns=results_all.columns)

[94mLoading best ATT2ITM model: 544d98b3ba0a0dfd7e3f0019f4d9675e[0m




2024-04-08 17:34:04.768409: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-08 17:34:04.768630: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-08 17:34:04.768804: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

[92m[INFO] Best epoch number: 161[0m
[92m[INFO] Model already trained. Loading weights...[0m
[92m[INFO] There are 3935 evaluation examples.[0m


2024-04-08 17:34:06.797576: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


[92m      loss    NDCG@1   NDCG@10   NDCG@50  NDCG@100   NDCG@-1  Precision@-1  \
0  0.01088  0.428463  0.584273  0.625951  0.632571  0.634601         0.663   

   Precision@1  Precision@5  Precision@10  Recall@-1  Recall@1  Recall@5  \
0     0.428463     0.132554      0.076036   0.168488  0.428463   0.66277   

   Recall@10  Recall@20  Recall@50     F1@-1      F1@1      F1@5     F1@10  
0   0.760356   0.845489   0.945616  0.268693  0.428463  0.220923  0.138247  [0m
[94mLoading best ATT2ITM_2 model: 9eb0630de9c3dcbac1a6414ebf1fca60[0m
[92m[INFO] Best epoch number: 105[0m
[92m[INFO] Model already trained. Loading weights...[0m
[92m[INFO] There are 3935 evaluation examples.[0m
[92m       loss   NDCG@1   NDCG@10   NDCG@50  NDCG@100   NDCG@-1  Precision@-1  \
0  2.476752  0.43507  0.593718  0.633286  0.640417  0.642262      0.079683   

   Precision@1  Precision@5  Precision@10  Recall@-1  Recall@1  Recall@5  \
0      0.43507     0.136163      0.076671   0.738247   0.43507  0.68

## Crear tabla Latex

In [3]:
def latex_table(results, metrics=["NDCG@10"], models = ['AITRecX', 'AITRecX (NT)']):
    result_df = []

    for metric in metrics:
        mres = results.pivot_table(index=[ "Set", "Subset"], columns=["users", "Model"])[metric].reset_index()
        mres["Metric"] = metric
        result_df.extend(mres.values)

    result_df = pd.DataFrame(result_df, columns=mres.columns)

    # Especificar el orden de los datos
    result_df['Set'] = pd.Categorical(result_df['Set'], ["AM", "TAV-POIS", "TAV-RSTS"])
    result_df['Subset'] = pd.Categorical(result_df['Subset'], ["Music", "Fashion", "Gijón", "Barcelona", "Madrid", "New York", "Paris", "London"])
    result_df['Metric'] = pd.Categorical(result_df['Metric'], metrics)

    result_df = result_df.pivot_table(index=["Set", "Subset", "Metric"])
    print(result_df.to_latex(float_format="%.3f"))
    # result_df[models].to_excel("trecx_known.xlsx")
    return result_df

metrics = ["NDCG@10"]
models = ['AITRecX', 'AITRecX (NT)']

table = latex_table(results, metrics=metrics, models=models)
table.to_excel("ablation_results.xlsx")

\begin{tabular}{lllrrrrrr}
\toprule
         &       & users & \multicolumn{2}{l}{all} & \multicolumn{2}{l}{known} & \multicolumn{2}{l}{unknown} \\
         &       & Model & AITRecX & AITRecX (NT) & AITRecX & AITRecX (NT) & AITRecX & AITRecX (NT) \\
Set & Subset & Metric &         &              &         &              &         &              \\
\midrule
AM & Music & NDCG@10 &   0.473 &        0.512 &   0.510 &        0.542 &   0.453 &        0.495 \\
         & Fashion & NDCG@10 &   0.496 &        0.514 &   0.558 &        0.575 &   0.478 &        0.496 \\
TAV-POIS & Barcelona & NDCG@10 &   0.851 &        0.858 &   0.846 &        0.854 &   0.865 &        0.870 \\
         & Madrid & NDCG@10 &   0.858 &        0.865 &   0.856 &        0.863 &   0.865 &        0.871 \\
         & New York & NDCG@10 &   0.875 &        0.879 &   0.877 &        0.882 &   0.860 &        0.862 \\
         & Paris & NDCG@10 &   0.887 &        0.894 &   0.885 &        0.892 &   0.900 &        0.904 \\
      

  result_df = result_df.pivot_table(index=["Set", "Subset", "Metric"])
  print(result_df.to_latex(float_format="%.3f"))


: 

In [60]:
print(table.to_string())

users                            all                  known                unknown             
Model                        AITRecX AITRecX (NT)   AITRecX AITRecX (NT)   AITRecX AITRecX (NT)
Set      Subset    Metric                                                                      
AM       Music     NDCG@10  0.473190     0.511576  0.509774     0.541622  0.452882     0.494715
         Fashion   NDCG@10  0.495916     0.514102  0.558292     0.575241  0.477773     0.496289
TAV-POIS Barcelona NDCG@10  0.851037     0.857648  0.846443     0.853607  0.865142     0.870095
         Madrid    NDCG@10  0.857720     0.864989  0.855511     0.862801  0.864532     0.871473
         New York  NDCG@10  0.874723     0.879449  0.876720     0.881818  0.860069     0.861913
         Paris     NDCG@10  0.887022     0.893569  0.884536     0.891588  0.900399     0.904179
         London    NDCG@10  0.876580     0.882557  0.875012     0.881900  0.884357     0.885942
TAV-RSTS Gijón     NDCG@10  0.584273    