In [16]:
from surprise import SVDpp
from surprise import Dataset
from surprise import evaluate, print_perf
from surprise import GridSearch
import pandas as pd
from surprise import Reader

In [6]:
ratings_file = "../datasets/movielens/ml-latest-small/ratings.csv"

In [7]:
df = pd.read_csv(ratings_file)

In [8]:
reader = Reader(rating_scale=(1, 5))

In [9]:
df.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100004.0,100004.0,100004.0,100004.0
mean,347.01131,12548.664363,3.543608,1129639000.0
std,195.163838,26369.198969,1.058064,191685800.0
min,1.0,1.0,0.5,789652000.0
25%,182.0,1028.0,3.0,965847800.0
50%,367.0,2406.5,4.0,1110422000.0
75%,520.0,5418.0,4.0,1296192000.0
max,671.0,163949.0,5.0,1476641000.0


In [10]:
data = Dataset.load_from_df(df[["userId", "movieId", "rating"]], reader)

In [11]:
data.split(n_folds=3)

algo = SVD()

perf = evaluate(algo, data, measures=['RMSE', 'MAE'])

print_perf(perf)

Evaluating RMSE, MAE of algorithm SVD.

------------
Fold 1
RMSE: 0.9101
MAE:  0.7012
------------
Fold 2
RMSE: 0.8957
MAE:  0.6910
------------
Fold 3
RMSE: 0.9028
MAE:  0.6952
------------
------------
Mean RMSE: 0.9029
Mean MAE : 0.6958
------------
------------
        Fold 1  Fold 2  Fold 3  Mean    
RMSE    0.9101  0.8957  0.9028  0.9029  
MAE     0.7012  0.6910  0.6952  0.6958  


In [12]:
factors = [ 50, 100 ]
reg_all = [ 10, 1, 0.05 , 0.001 ]
lr_all = [ 10, 1, 0.05, 0.001 ]
epochs = [ 20, 30 ]

gridsearch_params = { "n_factors": factors, "reg_all": reg_all, "lr_all": lr_all, "n_epochs": epochs }

In [17]:
grid_search = GridSearch(SVDpp, gridsearch_params, measures=['RMSE', 'MAE'])


[{'n_factors': 50, 'reg_all': 10, 'lr_all': 10, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 10, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 1, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 1, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 0.05, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 0.05, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 0.001, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 10, 'lr_all': 0.001, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 10, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 10, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 1, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 1, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 0.05, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 0.05, 'n_epochs': 30}, {'n_factors': 50, 'reg_all': 1, 'lr_all': 0.001, 'n_epochs': 20}, {'n_factors': 50, 'reg_all': 1, '

In [None]:
grid_search.evaluate(data)

------------
Parameters combination 1 of 64
params:  {'n_factors': 50, 'reg_all': 10, 'lr_all': 10, 'n_epochs': 20}
------------
Mean RMSE: 1.8001
Mean MAE : 1.4564
------------
------------
Parameters combination 2 of 64
params:  {'n_factors': 50, 'reg_all': 10, 'lr_all': 10, 'n_epochs': 30}
