In [9]:
import pandas as pd

from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate, GridSearchCV

In [3]:
ratings = pd.read_csv('../ml-latest-small/ratings.csv')
ratings.drop(columns='timestamp', inplace=True)
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [15]:
reader = Reader(rating_scale=(0.5, 5.0))

data = Dataset.load_from_df(ratings, reader=reader)

In [8]:
algo = SVD(random_state=0)

cross_validate(algo, data, measures=['rmse', 'mae'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8810  0.8701  0.8727  0.8657  0.8785  0.8736  0.0056  
MAE (testset)     0.6772  0.6696  0.6710  0.6660  0.6744  0.6716  0.0039  
Fit time          0.78    0.81    0.79    0.81    0.81    0.80    0.01    
Test time         0.09    0.14    0.08    0.13    0.08    0.10    0.03    


{'test_rmse': array([0.88101394, 0.87008873, 0.87270296, 0.86568025, 0.87854217]),
 'test_mae': array([0.6772234 , 0.66955739, 0.67096017, 0.66598104, 0.67437083]),
 'fit_time': (0.779766321182251,
  0.8068943023681641,
  0.7918815612792969,
  0.8078737258911133,
  0.8068482875823975),
 'test_time': (0.08872818946838379,
  0.13662147521972656,
  0.08278107643127441,
  0.13460564613342285,
  0.08174777030944824)}

In [11]:
params = {'random_state': [0],
          'n_epochs': [5, 10, 20, 40],
          'lr_all': [0.002, 0.005, 0.010, 0.020],
          'reg_all': [0.02, 0.05, 0.10, 0.50]}
grid_search_svd = GridSearchCV(SVD, params, measures=['rmse', 'mae'], cv=5)
grid_search_svd.fit(data)

In [12]:
grid_search_svd.best_score['mae']

0.651777368125526

In [13]:
grid_search_svd.best_params['mae']

{'random_state': 0, 'n_epochs': 40, 'lr_all': 0.02, 'reg_all': 0.1}