# Books Recommender 

Surprise is a scikit-based like libraries to develop recommender systems using explicit rating data. 

Surprise provides several algorithms to develop collaborative filtering recommender systems, starting from memory-based method (i.e. K-Nearest Neighbors or KNN) to model-based method (i.e. Singular Value Decomposition or SVD).

In [None]:
#!pip install scikit-surprise

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise.model_selection import GridSearchCV
from surprise.model_selection import train_test_split

In [None]:
books = pd.read_csv('data/books/listing.csv', encoding = 'latin-1')
descriptions = pd.read_csv('data/books/description.csv', encoding = 'latin-1')
ratings = pd.read_csv('data/books/ratings.csv', encoding = 'latin-1')

In [None]:
# Get minimum and maximum rating from the dataset
min_rating = ratings['user_rating'].min()
max_rating = ratings['user_rating'].max()
 
reader = Reader(rating_scale=(min_rating, max_rating))
data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'user_rating']], reader)

### Matrix Factorization


In [None]:
svd = SVD(n_epochs=10)
results = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=10, verbose=True)

In [None]:
print("Average MAE: ", np.average(results["test_mae"]))
print("Average RMSE: ", np.average(results["test_rmse"]))

### Hyperparameter Tuning

Additionally, we can also conduct hyperparameter tuning to obtain the best hyperparameters for our model. In this case, we use grid search cross-validation in hyperparameter tuning.

In [None]:
param_grid = {
  'n_factors': [20, 50, 100],
  'n_epochs': [5, 10, 20]
}
 
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=10)
gs.fit(data)
 
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

### Model Retraining

After the best hyperparameters are obtained, we can retrain the model using these hyperparameter.

In [None]:
# best hyperparameters
best_factor = gs.best_params['rmse']['n_factors']
best_epoch = gs.best_params['rmse']['n_epochs']
 
# sample random trainset and testset
# test set is made of 20% of the ratings.
trainset, testset = train_test_split(data, test_size=.20)
 
# We'll use the famous SVD algorithm.
svd = SVD(n_factors=best_factor, n_epochs=best_epoch)
 
# Train the algorithm on the trainset
svd.fit(trainset)

### Recommendation

In [None]:
def generate_recommendation(model, user_id, ratings_df, books_df, n_items):
    
    # Get a list of all items from dataset
    book_ids = ratings_df["book_id"].unique()
 
    # Get a list of all items that have been rated by user
    book_ids_user = ratings_df.loc[ratings_df["user_id"] == user_id, "book_id"]
    
    # Get a list off all items that that have not been not rated by user
    book_ids_to_pred = np.setdiff1d(book_ids, book_ids_user)
 
    # Apply a rating of 4 to all interactions (only to match the Surprise dataset format)
    test_set = [[user_id, book_id, 4] for book_id in book_ids_to_pred]
 
    # Predict the ratings and generate recommendations
    predictions = model.test(test_set)
    pred_ratings = np.array([pred.est for pred in predictions])
    print("Top {0} item recommendations for user {1}:\n".format(n_items, user_id))
    
    # Rank top-n items based on the predicted ratings
    index_max = (-pred_ratings).argsort()[:n_items]
    for i in index_max:
        book_id = book_ids_to_pred[i]
        print(books_df[books_df["book_id"]==book_id]["name"].values[0], pred_ratings[i])
 

### Recommendations

In [None]:
# define which user ID that we want to give recommendation
user_id = 3472

# define how many top-n movies that we want to recommend
n_items = 10

# generate recommendation using the model that we have trained
generate_recommendation(svd,user_id,ratings,books,n_items)