##**Código documentado**

In [1]:
#Instalamos la librería surprise
! pip install surprise

#Importamos las librerías a utilizar
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.metrics import mean_squared_error
from surprise import *
from surprise.model_selection import cross_validate
from collections import defaultdict

#Solicitamos permisos y montamos la carpeta de Google Drive  
drive.mount('/content/drive')

#Establecemos un máximo fijo en el ancho de las columnas
pd.options.display.max_colwidth = 150

#Leemos los archivos
puntuaciones = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/M8/DataSets/datos_ejercicios/ratings_filtratos.csv', low_memory=False)
movies = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/M8/DataSets/datos_ejercicios/medias.csv', low_memory=False)

#Eliminar columnas "time" y "Unnamed: 0"
puntuaciones = puntuaciones.drop(['Unnamed: 0','time'], axis=1)
movies = movies.drop(['Unnamed: 0','conteo','media_rating','genre'], axis=1)

#Definimos un objetivo reader para poder pasar el dataframe al formato que necesita surprise para hacer las recomendaciones 
reader = Reader()

#Creamos el dataset con los datos de los ratings
data = Dataset.load_from_df(puntuaciones[['user_id', 'imdb_id', 'rating']], reader)


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 4.4 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1630065 sha256=68232032e9499a9fa878635420f6acfc1ec7efff0f95ceeb79b025b09f024059
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1
Mounted at /content/drive


In [2]:
#Realizamos un benchmark para determinar cuál es el algoritmo más adecuado. 
benchmark = []
#Iteramos todos los algoritmos
for algoritmo in [SVD(), SlopeOne(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
    #Realizamos la validación cruzada
    results = cross_validate(algoritmo, data, measures=['RMSE'], verbose=False)
    
    #Guardamos los resultados
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algoritmo).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse') 

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Don

Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BaselineOnly,2.883014,0.129318,0.085025
SVD,2.883536,2.737154,0.110473
KNNBaseline,2.892984,0.190643,0.613617
KNNWithMeans,2.89581,0.104664,0.49573
KNNWithZScore,2.897193,0.166703,0.590919
KNNBasic,2.898945,0.075451,0.475988
CoClustering,2.901391,1.992135,0.134073
SlopeOne,2.904908,4.969942,1.052351
NormalPredictor,2.980912,0.068057,0.112839


In [3]:
#Definimos el algoritmo a utilizar, en este caso "BaselineOnly" fue el ganador
BLO = BaselineOnly()

#Dividimos el dataset en train y test
trainset = data.build_full_trainset()

#Ponemos a aprender al modelo
BLO.fit(trainset)

#Generamos las predicciones sobre el conjunto de test
testset = trainset.build_anti_testset()
predictions = BLO.test(testset)

Estimating biases using als...


In [4]:
#Generamos el proceso para obtener un top 10 de recomendaciones
def get_top_n(predictions, n=10):

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n(predictions, n=10)

#Creamos un Dataframe vacío
reco_final = pd.DataFrame()

In [5]:
#Guardamos en el dataframe vacío las recomendaciones para cada usuario
for uid, user_ratings in top_n.items():
  usuario= uid
  recousuarios = [iid for (iid, _) in user_ratings]
  data = {'user_id':usuario,
        'imdb_id':recousuarios}
  reco_final = reco_final.append(pd.DataFrame(data))

  
reco_final['user_id'] = reco_final['user_id'].astype(int)

In [6]:
#Unimos los dataframes
reco_final = pd.merge(reco_final,movies, on = 'imdb_id')

#Mostramos las 10 mejores recomendaciones de X usuario
reco_final.head(10)

Unnamed: 0,user_id,imdb_id,imdb_title
0,116,tt0169858,Neon Genesis Evangelion: The End of Evangelion
1,116,tt1121794,Sword of the Stranger
2,116,tt12361974,Zack Snyder's Justice League
3,116,tt1950186,Ford v Ferrari
4,116,tt8332922,A Quiet Place Part II
5,116,tt8367814,The Gentlemen
6,116,tt8772262,Midsommar
7,116,tt9426210,Weathering with You
8,116,tt9541602,Extreme Job
9,116,tt0037515,And Then There Were None
