## Preliminary

In [23]:
import pandas as pd

from surprise import SVDpp
from surprise import SVD
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate

In [22]:
raw_path = '../data/BX-Book-Ratings.csv'
raw = pd.read_csv(raw_path, sep=';', encoding = 'ISO-8859-1')
raw.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [25]:
reader = Reader( rating_scale= (0,10))
# data have to follow this structure:
#   user: item: rating
data = Dataset.load_from_df(raw[['User-ID', 'ISBN', 'Book-Rating']], reader)
trainset, testset = train_test_split(data, test_size=.15)


In [27]:
# Use the new parameters with the train data
algo = SVD(n_factors=10, n_epochs=100, lr_all=0.005, reg_all=0.1)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11fb97160>

In [28]:
test_pred = algo.test(testset)
print("SVD : Test Set")
accuracy.rmse(test_pred, verbose=True)

SVD : Test Set
RMSE: 3.7059


3.7058904629533096

In [10]:
# ----- SVD ----- #

param_grid = {'n_factors': [110, 120, 140, 160], 'n_epochs': [90, 100, 110], 'lr_all': [0.001, 0.003, 0.005, 0.008],
              'reg_all': [0.08, 0.1, 0.15]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)
algo = gs.best_estimator['rmse']



KeyboardInterrupt: 

In [None]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Use the new parameters with the train data
algo = SVD(n_factors=160, n_epochs=100, lr_all=0.005, reg_all=0.1)
algo.fit(trainset)
test_pred = algo.test(testset)
print("SVD : Test Set")
accuracy.rmse(test_pred, verbose=True)


In [None]:
# ----- SVD++ ----- #

param_grid = {'n_factors': [20, 30, 40], 'n_epochs': [20,30,40], 'lr_all': [0.001, 0.003, 0.005, 0.008],
              'reg_all': [0.08, 0.1, 0.15]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)
algo = gs.best_estimator['rmse']
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)



In [None]:
# Use the new parameters with the train data
algo = SVDpp(n_factors=40, n_epochs=40, lr_all=0.008, reg_all=0.1)
algo = SVDpp()
algo.fit(trainset)
test_pred = algo.test(testset)
print("SVD++ : Test Set")
accuracy.rmse(test_pred, verbose=True)