In [None]:
predictions = algo.test(testset)

In [None]:
top_n = get_top_n(predictions, n=10)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

In [None]:
%pip install -r requirements.txt

In [None]:
import pandas as pd
import numpy as np

import random

from itertools import chain
import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from surprise import Dataset
from surprise import Reader

In [None]:
# Importante para garantir a reprodutibilidade dos experimentos

my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)


In [None]:
movies = pd.read_csv('./datasets/movies.csv', encoding='utf-8')
ratings = pd.read_csv('./datasets/ratings.csv', encoding='utf-8')
# tags = pd.read_csv('./datasets/tags.csv', encoding='utf-8')

In [None]:
data = pd.merge(movies, ratings, on="movieId")
data = data.drop(["timestamp"], axis=1)

data['genres'] = data['genres'].str.split('|')

data.head()

In [None]:
min_rating = data["rating"].min()
max_rating = data["rating"].max()

print(f"Max movie rating: {max_rating}")
print(f"Min movie rating: {min_rating}")

In [None]:
genre_counts = data.explode('genres')['genres'].value_counts().to_dict()

genres = list(genre_counts.keys())
counts = list(genre_counts.values())

plt.figure(figsize=(10, 6))  # Define o tamanho da figura
bars = plt.bar(genres, counts)  # Cria o gráfico de barras
plt.xlabel('Genre')  # Define o rótulo do eixo x
plt.ylabel('Movie Count')  # Define o rótulo do eixo y
plt.title('Movie Count by Genre')  # Define o título do gráfico
plt.xticks(rotation=45)  # Rotaciona os rótulos do eixo x em 45 graus

# Adiciona os valores das colunas acima delas com tamanho de fonte reduzido
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, yval, yval, ha='center', va='bottom', fontsize=8)

plt.show()  # Exibe o gráfico

In [None]:
plt.hist(data['rating'], bins=20, rwidth=0.8)

plt.xlabel('Movie Ratings')
plt.ylabel('Total Ratings')
plt.title('Distribution of Ratings')

value_counts = data['rating'].value_counts()

unique_values = value_counts.index
ticks = [value for value in unique_values if value_counts[value] > 0]
plt.xticks(ticks)

for value in unique_values:
    count = value_counts[value]
    if count > 0:
        plt.text(value, count, str(count), ha='center', va='bottom')

plt.show()


In [None]:
# https://surprise.readthedocs.io/en/stable/model_selection.html#surprise.model_selection.search.GridSearchCV

In [24]:
from surprise import SVD
from surprise.model_selection import GridSearchCV
from surprise import accuracy
 
param_grid = {
  'n_factors': [20, 50, 100],
  'n_epochs': [5, 10, 20, 30, 40, 50],
  "lr_all": [0.001,0.002, 0.005],
  "reg_all": [0.4, 0.6]
}
 
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, n_jobs=-1)
gs.fit(data)
 
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])


# O algo possui os melhorar parametros
algo = gs.best_estimator["rmse"]

In [None]:


algo.fit(trainset)

In [None]:
reader = Reader(rating_scale=(min_rating, max_rating))
data = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

In [None]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n
