In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import Reader, Dataset, SVD
import time
from surprise.accuracy import rmse, mae
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV


In [29]:
### read ratings.csv file
ratings = pd.read_csv("./Ratings-New.csv")
ratings.head()

Unnamed: 0,User-ID,unique_isbn,Book-Rating
0,276726,0155061224,5
1,276729,052165615X,3
2,276729,0521795028,6
3,276744,038550120X,7
4,276747,0060517794,9


In [30]:
reader = Reader(rating_scale=(1, 10))
data  = Dataset.load_from_df(ratings, reader)



In [31]:
trainset, testset = train_test_split(data, test_size=0.4)


In [32]:
model = SVD()


In [33]:
## Use SVD to train
model = SVD()
cross_validate(model, data, measures=['RMSE'], cv=3, verbose=True)

Evaluating RMSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6389  1.6338  1.6319  1.6361  1.6393  1.6360  0.0028  
Fit time          17.09   19.11   18.39   18.46   17.87   18.18   0.67    
Test time         0.49    0.72    0.48    0.65    0.49    0.57    0.10    


{'test_rmse': array([1.63885822, 1.6337785 , 1.63193906, 1.6360792 , 1.63931481]),
 'fit_time': (17.091822147369385,
  19.105512619018555,
  18.391079425811768,
  18.455477237701416,
  17.86833906173706),
 'test_time': (0.4938468933105469,
  0.7240447998046875,
  0.4788837432861328,
  0.653252363204956,
  0.49462103843688965)}

In [35]:
param_grid = {'n_factors': [80, 100, 120], 'lr_all': [0.001, 0.005, 0.01], 'reg_all': [0.01, 0.02, 0.04]}

In [36]:
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

In [37]:
gs.fit(data)

In [38]:
# Return the best version of the SVD algorithm
model = gs.best_estimator['rmse']

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

1.6309949109422797
{'n_factors': 80, 'lr_all': 0.005, 'reg_all': 0.04}


In [40]:
model = SVD(n_factors=80, lr_all=0.005, reg_all=0.04)
model.fit(trainset) # re-fit on only the training data using the best hyperparameters
test_pred = model.test(testset)
print("SVD : Test Set")
rmse(test_pred, verbose=True)

SVD : Test Set
RMSE: 1.6425


1.6425317460704485