

# **Predicción con SVD++**


# Índice

>[Índice](#scrollTo=2j02H66f87eV)

>[Descargando la información](#scrollTo=9qKTQ2V5VKSX)

>[Preparar entorno](#scrollTo=P3K5VPZoGSYX)

>[UserKNN](#scrollTo=2e0ZeZXzMhU3)

>>[Generar una lista de recomendaciones para un usuario](#scrollTo=dIVXNsQ8CCJV)

>>>[Evaluar en base a error de prediccion (RMSE y MAE)](#scrollTo=g55SDLmqCpUo)



# Descargando la información


In [11]:
import pandas as pd

train_file = pd.read_csv('../data/training_set.csv', sep=',', header=0)
train_file = train_file[['userID', 'itemID', 'rating']]
train_file.head()

Unnamed: 0,userID,itemID,rating
0,401,32780,3.0
1,7548,21688,3.5
2,7663,1521,4.0
3,1357,3824,3.5
4,1361,727,4.5


In [12]:
validation_file = pd.read_csv('../data/validation_set.csv', sep=',', header=0)
validation_file = validation_file[['userID', 'itemID', 'rating']]
validation_file.head()

Unnamed: 0,userID,itemID,rating
0,1040,4920,4.0
1,1306,7348,4.0
2,932,4073,4.0
3,6735,16074,4.5
4,197,39299,4.0


## Preparando entorno

In [1]:
pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2812213 sha256=e53047c4d66c8e2da56d83b40f4e314b9286fef1318d1e5028aca67f3452589f
  Stored in directory: /home/conflictuada_ale/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed 

In [64]:
import surprise
import numpy as np
import os
import pandas as pd
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import cross_validate
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt

## SVD ++


In [31]:
from surprise.model_selection import PredefinedKFold
reader = surprise.Reader(line_format='user item rating', sep=',', rating_scale=(1,5))
train_data = surprise.Dataset.load_from_df(train_file[['userID', 'itemID', 'rating']], reader)
testset = list(zip(validation_file['userID'], validation_file['itemID'], validation_file['rating']))


In [29]:
from surprise import SVDpp
trainset = train_data.build_full_trainset()

In [30]:
# Definicion de objeto svd++
algorithm = SVDpp()
# Entrenamiento del modelo
algorithm.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x7f7718520c10>

In [33]:
# Hacer predicciones sobre el conjunto de validación
predictions = algorithm.test(testset)

In [35]:
accuracy.rmse(predictions)
accuracy.mae(predictions)

RMSE: 0.5910
MAE:  0.4419


0.4419464730670412

## Análisis de sensibilidad 

## Rating

In [36]:
rating_test = pd.read_csv('../templates/rating_template_fill.csv', sep=';', header=0)
rating_test.head()

Unnamed: 0,itemID,styleID,Rating,userID
0,39289,14756,,715
1,3939,1417,,4944
2,47695,14879,,4890
3,7348,1199,,5324
4,37403,1199,,5544


In [47]:
# Función para obtener el promedio de ratings de un usuario en caso de que el item no esté en training
def user_average_rating(user_id, train_ratings):
    user_ratings = train_ratings[train_ratings['userID'] == user_id]['rating']
    if len(user_ratings) > 0:
        return user_ratings.mean()
    else:
        return train_ratings['rating'].mean()

# Lista de ítems en training
training_items = train_file['itemID'].unique()

In [61]:
for index, row in rating_test.iterrows(): 
    user_id = str(int(row.iloc[3]))
    item_id = str(int(row.iloc[0]))
    
    if int(item_id) not in training_items:
      rating = user_average_rating(user_id, train_file)
    else:
        prediccion = algorithm.predict(user_id, item_id)
        rating = prediccion.est
    rating_test.at[index, 'Rating'] = rating


In [62]:
rating_test.head()

Unnamed: 0,itemID,styleID,Rating,userID
0,39289,14756,3.868971,715
1,3939,1417,3.868971,4944
2,47695,14879,3.868971,4890
3,7348,1199,3.868971,5324
4,37403,1199,3.868971,5544


In [65]:
ruta = os.path.join('../predictions', 'rating_svd.csv')
rating_test.to_csv(ruta, index=False)

## Ranking

In [66]:
import json

ranking_test = json.load(open('../templates/ranking_template_fill.json'))

In [68]:
#Función para obtener las recomendaciones por cada usuario obtenida del práctico Surprise_FunSVD y modificada para SVD++
def get_top_n(predictions, n=10):
    """Devuelve las N-mejores recomendaciones para cada usuario de un set de predicción.

    Args:
        predictions(lista de objetos Prediction): La lista de predicción obtenida del método test.
        n(int): El número de recomendaciónes por usuario

    Returns:
    Un diccionario donde las llaves son ids de usuario y los valores son listas de tuplas:
        [(item id, rating estimation), ...] de tamaño n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [70]:
#COMPLETAR
top_n = get_top_n(predictions, n=10)
print('Recommendation for user {}: {}'.format(user_id, top_n["2"]))


Recommendation for user 3728: []


In [None]:
ruta = os.path.join('../predictions', 'ranking_iknn.json')
rating_test.to_json(ruta, index=False)