# Evaluación de rendimiento Modelos Usuario e Item 

In [1]:
import numpy as np
import pandas as pd
import time
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import accuracy
import warnings
warnings.filterwarnings("ignore")

In [2]:
PathUser = "D:/Dataset_1/lastfm-dataset-1K/Modelo_Usuario_Usuario/"
PathItem = "D:/Dataset_1/lastfm-dataset-1K/Modelo_Item_Item/"

In [3]:
df_ratings_user =  pd.read_pickle(PathUser + "1.Ratings_Normalizacion_Log.pkl")
df_ratings_user.rating_lineal = round(df_ratings_user.rating_lineal).astype('uint8')

# Este dataframe es generado desde el script del modelo item-item y tiene reducción en artistas 
df_ratings_items =  pd.read_pickle(PathItem + "1.Ratings_Normalizacion_Log.pkl")
df_ratings_items.rating_lineal = round(df_ratings_items.rating_lineal).astype('uint8')

In [21]:
df_ratings_items.artname.nunique()

63291

In [8]:
# Función del modelo 
def modelSR_User_Eval(df, medida_similitud,k,umbrals, Flagmodel,gamma): 
        reader = Reader( rating_scale = ( 1, 5 ))
        #Se crea el dataset a partir del dataframe
        dataset_ratings = Dataset.load_from_df( df[ [ 'user_id', 'artname', 'rating_lineal']], reader )
        #Particionamiento de Datframe en 80% entrenamiento y 20% validación
        train_set, test_set=  train_test_split(dataset_ratings, test_size=.2)
        # se crea un modelo knnbasic item-item con similitud coseno 
        sim_options = {'name': medida_similitud,'user_based': Flagmodel , 'min_support': umbrals , 'mclaughlin':True, 'gamma':gamma}
        model = KNNBasic(k=k, min_k=2, sim_options=sim_options)
        #Se le pasa la matriz de utilidad al algoritmo, es decir, el conjunto de entrenamiento
        model.fit(trainset=train_set)
        # Cálculo de predicción para todos los items del conjunto de validación
        test_predictions=model.test(test_set)
        #labels = ['user_id', 'artname', 'estimation']
        #df_predictions = pd.DataFrame.from_records(list(map(lambda x: (x.uid, x.iid, x.est) , test_predictions)), columns=labels)
        metrica = accuracy.rmse( test_predictions, verbose = True )
        #print(accuracy.rmse( test_predictions, verbose = True ))
        return metrica

# Evaluación Hiperparametros Modelo Usuario - Usuario

In [12]:
# Entonación de hyperparams tamaño de vecindario y umbral de simulitud modelo usuario - usuario
medidas = ['jaccard','cosine', 'pearson']
umbrales = [0.4,0.5, 0.6,0.7]
lista_gamma = [10,11,12,13,14,15,16,17,18,19,20]
lista = [i*2 for i in range(1, 20)]
result = []

for h in medidas:
    for i in lista:
        for j in umbrales:
            for k in lista_gamma:
                start_time = time.clock()
                valor = modelSR_User_Eval(df_ratings_user, h, i, j, True,k)
                end_time = time.clock()
                result.append([i, j, h, k, start_time, end_time,  valor])
                df_evaluacion0 = pd.DataFrame(result, columns = ["Tamanio_Vecindario", "Umbral_Similitud", "Indice", "Valor_Gamma", "Inicio_seg", "Fin_seg", "RMSE"])

df_evaluacion0.to_pickle(PathUser+"1.Evaluacion_Cos_Pearson_Jaccard_ModeloUser_Gamma.pkl")

Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6883
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6865
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6892
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6870
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6868
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6863
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6890
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6879
Computing the jaccard similarity matrix.

# Evaluación Hiperparametros Modelo Item - Item

La ejecución de esta evaluación toma alrededor de 4 - 5 días dependiendo del hardware

In [None]:
# Entonación de hyperparams tamaño de vecindario y umbral de simulitud modelo item - item 

medidas = ['jaccard','cosine', 'pearson']
umbrales = [ 0.8, 0.9]
lista_gamma = [17,19]
lista = [i*2 for i in range(18, 20)]
result = []

for h in medidas:
    for i in lista:
        for j in umbrales:
            for k in lista_gamma:
                start_time = time.clock()
                valor = modelSR_User_Eval(df_ratings_items, h, i, j, False,k)
                end_time = time.clock()
                result.append([i, j, h, k, start_time, end_time,  valor])
                df_evaluacion0 = pd.DataFrame(result, columns = ["Tamanio_Vecindario", "Umbral_Similitud", "Indice", "Valor_Gamma", "Inicio_seg", "Fin_seg", "RMSE"])



df_evaluacion.to_pickle(PathItem+"1.Evaluacion_Cos_Pearson_Jaccard_ModeloItem_Gamma.pkl")
    

Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6854
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6829
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6827
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6816
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6874
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6820
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6805
Computing the jaccard similarity matrix...
Mclaughlin: Multiplyng matrix
Done computing similarity matrix.
RMSE: 0.6816
Computing the cosine similarity matrix..