In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import nvgpu
import os

gpu = np.argmin([g["mem_used_percent"] for g in nvgpu.gpu_info()]) 
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

from src.experiments.Common import load_best_model

2024-04-08 21:04:03.502194: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-08 21:04:03.502218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-08 21:04:03.503243: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-08 21:04:03.508364: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-08 21:04:06.779140: I external/local_xla/xla/

Using TensorFlow backend



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



## Obtener los resultados en test del modelo (**para todo tipo de usuarios**)

In [None]:
def obtain_results(datasets, models, users=0):
    '''users=0 todos, 1 solo conocidos, 2 desconocidos durante el entrenamiento'''
    results = []

    for dataset, subsets in datasets.items():
        for subset in subsets:
            for model in models:
                # Cargar configuración mejor modelo
                model_class = load_best_model(model=model, dataset=dataset, subset=subset, gpu=gpu)
                # Cargar el modelo entrenado
                model_class.train(dev=False, save_model=True)
                
                if users==1:
                    # Se buscan los usuarios de train y se dejan solo esos en test
                    train_dev_users = model_class.DATASET.DATA["TRAIN_DEV"].userId.unique()
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"][model_class.DATASET.DATA["TEST"]["userId"].isin(train_dev_users)]
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"].drop_duplicates(subset=["userId", "id_item"], keep='last', inplace=False)

                elif users==2:
                    # Se buscan los usuarios de train+dev y se eliminan de test
                    train_dev_users = model_class.DATASET.DATA["TRAIN_DEV"].userId.unique()
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"][~model_class.DATASET.DATA["TEST"]["userId"].isin(train_dev_users)]
                    model_class.DATASET.DATA["TEST"] = model_class.DATASET.DATA["TEST"].drop_duplicates(subset=["userId", "id_item"], keep='last', inplace=False)

                # Evaluar el modelo final
                result = model_class.evaluate(test=True, user_info=True)
                result["Model"] = model
                result["Set"] = dataset
                result["Subset"] = subset
                results.append(result.values[0])
                
    results = pd.DataFrame(results, columns=result.columns)
    # Poner nombres decentes para el artículo
    results["Set"].replace({"restaurants": "TAV-RSTS", "pois":"TAV-POIS", "amazon":"AM"}, inplace=True)
    results["Subset"].replace({"digital_music": "Music", "fashion":"Fashion", "gijon": "Gijón", "barcelona":"Barcelona", "madrid": "Madrid", "paris": "Paris", "newyorkcity": "New York", "london": "London"}, inplace=True)
    results["Model"].replace({"ATT2ITM": "AITRecX", "ATT2ITM_2": "AITRecX (NT)",}, inplace=True)

    return results

datasets = {"restaurants":["gijon", "barcelona", "madrid", "paris", "newyorkcity"],
            "pois":["barcelona", "madrid", "paris", "newyorkcity", "london"],
            "amazon":["fashion", "digital_music"]}

datasets = {"restaurants":[ "newyorkcity"]}
models = ["ATT2ITM", "ATT2ITM_2"]

results_all = obtain_results(datasets=datasets, models=models, users=0)
results_known = obtain_results(datasets=datasets, models=models, users=1)
results_unknown = obtain_results(datasets=datasets, models=models, users=2)

results = []
results_all["users"]="all"
results.extend(results_all.values)
results_known["users"]="known"
results.extend(results_known.values)
results_unknown["users"]="unknown"
results.extend(results_unknown.values)
results = pd.DataFrame(results, columns=results_all.columns)

## Crear tabla Latex

In [None]:
def latex_table(results, metrics=["NDCG@10"], models = ['AITRecX', 'AITRecX (NT)']):
    result_df = []

    for metric in metrics:
        mres = results.pivot_table(index=[ "Set", "Subset"], columns=["users", "Model"])[metric].reset_index()
        mres["Metric"] = metric
        result_df.extend(mres.values)

    result_df = pd.DataFrame(result_df, columns=mres.columns)

    # Especificar el orden de los datos
    result_df['Set'] = pd.Categorical(result_df['Set'], ["AM", "TAV-POIS", "TAV-RSTS"])
    result_df['Subset'] = pd.Categorical(result_df['Subset'], ["Music", "Fashion", "Gijón", "Barcelona", "Madrid", "New York", "Paris", "London"])
    result_df['Metric'] = pd.Categorical(result_df['Metric'], metrics)

    result_df = result_df.pivot_table(index=["Set", "Subset", "Metric"])
    print(result_df.to_latex(float_format="%.3f"))
    # result_df[models].to_excel("trecx_known.xlsx")
    return result_df

metrics = ["NDCG@10"]
models = ['AITRecX', 'AITRecX (NT)']

table = latex_table(results, metrics=metrics, models=models)
table.to_excel("output/ablation_results.xlsx")

## Comparar gráficas y valores de "attention" para ambos modelos

In [5]:
def evaluate_samples(datasets, models):

    for dataset, subsets in datasets.items():
        for subset in subsets:
            for model in models:
                # Cargar configuración mejor modelo
                model_class = load_best_model(model=model, dataset=dataset, subset=subset, gpu=gpu)
                # Cargar el modelo entrenado
                model_class.train(dev=False, save_model=True)
                # Evaluar un texto
                model_class.evaluate_text("no")

datasets = {"pois":["madrid", "paris", "newyorkcity"]}
models = ["ATT2ITM", "ATT2ITM_2"]

evaluate_samples(datasets=datasets, models=models)

[94mLoading best ATT2ITM model: afe7526023b1827d88ecad6479de8971[0m
[92m[INFO] Best epoch number: 244[0m
[92m[INFO] Model already trained. Loading weights...[0m
[92m[QUERY] 'no'[0m
[PREPR]         [no]
[TXT2ID]        [11]
[WORD FREQ]     [88862]
El rango de valores de la matriz de atención para todas las palabras se mueve en el rango [-1.0,0.9999994039535522].
[91m[ERROR] Reparar y unificar la parte de selección de palabras relevantes[0m
	[0.53] Parque de El Capricho                        {'osuna': 0.99927163, 'bunker': 0.9990261, 'capricho': 0.9983824, 'duquesa': 0.9964534, 'retirado': 0.9932595, 'comunion': 0.992307, 'casita': 0.9911413, 'cisn': 0.9891181, 'duque': 0.9873863, 'canal': 0.98469764}
	[0.52] Catedral de Sta María la Real de la Almudena {'catedral': 0.9971976, 'neogotico': 0.99526435, 'religioso': 0.99112713, 'cripta': 0.990997, 'misa': 0.98993, 'vidriera': 0.98922557, 'altar': 0.9882856, 'virgen': 0.9812937, 'catolico': 0.966136, 'techo': 0.9652268}
	[0.52] F