In [None]:
!pip install hyperopt

In [1]:
from surprise import SVD
from surprise import KNNBasic
from surprise import Dataset
from surprise.model_selection import cross_validate

data = Dataset.load_builtin('ml-100k')

## Manual usage

In [2]:
algo = SVD()

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9426  0.9364  0.9374  0.9362  0.9396  0.9384  0.0024  
MAE (testset)     0.7407  0.7368  0.7379  0.7385  0.7426  0.7393  0.0021  
Fit time          9.91    10.02   7.53    5.81    6.09    7.87    1.80    
Test time         0.31    0.31    0.26    0.13    0.18    0.24    0.07    


{'test_rmse': array([0.94257595, 0.93635922, 0.93741969, 0.93622632, 0.93964228]),
 'test_mae': array([0.74068396, 0.73680666, 0.73794365, 0.73848225, 0.7426143 ]),
 'fit_time': (9.909649133682251,
  10.018833637237549,
  7.5276265144348145,
  5.814746141433716,
  6.092000961303711),
 'test_time': (0.31347203254699707,
  0.31253695487976074,
  0.26464223861694336,
  0.1318800449371338,
  0.18134117126464844)}

In [3]:
algo = KNNBasic()

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)['test_rmse'].mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9699  0.9883  0.9783  0.9847  0.9719  0.9786  0.0071  
MAE (testset)     0.7659  0.7817  0.7707  0.7789  0.7675  0.7729  0.0063  
Fit time          0.76    1.00    1.05    0.98    1.00    0.96    0.10    
Test time         4.64    7.08    7.03    6.96    6.81    6.51    0.94    


0.978622233312796

## Hyper parameter tuning

In [4]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [14]:
similarity_options_space = {
    'name': hp.choice('name', ['cosine', 'msd', 'pearson', 'pearson_baseline']),
    'user_based': hp.choice('user_based', [False, True]),
    'shrinkage': hp.choice('shrinkage', range(1, 300))
}

svd_space = {
    'n_factors': hp.choice('n_factors', range(1, 100)),
    'n_epochs': hp.choice('n_epochs', range(1, 20)),
    'lr_all': hp.uniform('lr_all', 0.001, 0.3),
    'reg_all': hp.uniform('reg_all', 0.001, 0.3),
}

knn_space = {
    'k': hp.choice('k', range(1,100))
}

In [15]:
def hyperopt_svd(params):
    print(params)
    algo = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'], lr_all=params['lr_all'], reg_all=params['reg_all'])
    return cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)['test_rmse'].mean()

def objective_svd(params):
    rmse = hyperopt_svd(params)
    return {'loss': rmse, 'status': STATUS_OK}

space = svd_space 

trials = Trials()
svd_best = fmin(objective_svd, space, algo=tpe.suggest, max_evals=20, trials=trials)
print('best:')
print(svd_best)

{'lr_all': 0.2512500575841037, 'n_epochs': 14, 'n_factors': 54, 'reg_all': 0.1898828330768992}                         
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).                                                                   
                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                                                  
RMSE (testset)    1.0723  1.0641  1.0692  1.0629  1.0661  1.0669  0.0034  
MAE (testset)     0.8502  0.8410  0.8468  0.8433  0.8445  0.8452  0.0031  
Fit time          4.72    4.54    4.43    4.77    4.90    4.67    0.17    
Test time         0.38    0.21    0.32    0.30    0.37    0.31    0.06    
{'lr_all': 0.19896086075616254, 'n_epochs': 2, 'n_factors': 80, 'reg_all': 0.1988757966861109}                         
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).                                                                   
                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                               

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                                                  
RMSE (testset)    1.0141  1.0290  1.0200  1.0266  1.0323  1.0244  0.0065  
MAE (testset)     0.8052  0.8198  0.8135  0.8180  0.8220  0.8157  0.0059  
Fit time          0.60    0.61    0.58    0.56    0.57    0.58    0.02    
Test time         0.38    0.34    0.35    0.28    0.37    0.34    0.04    
{'lr_all': 0.23593460503423527, 'n_epochs': 8, 'n_factors': 75, 'reg_all': 0.23224372851309458}                        
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).                                                                   
                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                                                  
RMSE (testset)    1.0450  1.0333  1.0462  1.0312  1.0450  1.0402  0.0065  
MAE (testset)     0.8270  0.8186  0.8319  0.8186  0.8306  0.8253  0.0057  
Fit time          3.23    3.49    3.50    3.38    3.16    3.35    0.13

In [16]:
def hyperopt_knn_basic(params):
    print(params)
    sim_options = {
        'name': params['name'],
        'user_based': params['user_based'],
        'shrinkage': params['shrinkage']
    }
    
    algo = KNNBasic(k=params['k'], sim_options=sim_options)
    return cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)['test_rmse'].mean()

def objective_knn_basic(params):
    rmse = hyperopt_knn_basic(params)
    return {'loss': rmse, 'status': STATUS_OK}

space = {**similarity_options_space, **knn_space} 

trials = Trials()
knn_best = fmin(objective_knn_basic, space, algo=tpe.suggest, max_evals=20, trials=trials)
print('best:')
print(knn_best)

{'k': 37, 'name': 'msd', 'shrinkage': 246, 'user_based': False}                                                        
Computing the msd similarity matrix...                                                                                 
Done computing similarity matrix.                                                                                      
Computing the msd similarity matrix...                                                                                 
Done computing similarity matrix.                                                                                      
Computing the msd similarity matrix...                                                                                 
Done computing similarity matrix.                                                                                      
Computing the msd similarity matrix...                                                                                 
Done computing similarity matrix.       

als...                                                                                                                 
Computing the pearson_baseline similarity matrix...                                                                    
Done computing similarity matrix.                                                                                      
Estimating biases using                                                                                                
als...                                                                                                                 
Computing the pearson_baseline similarity matrix...                                                                    
Done computing similarity matrix.                                                                                      
Estimating biases using                                                                                                
als...                                  

Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).                                                              
                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                                                  
RMSE (testset)    1.0460  1.0508  1.0429  1.0475  1.0448  1.0464  0.0026  
MAE (testset)     0.8356  0.8422  0.8356  0.8422  0.8373  0.8386  0.0030  
Fit time          5.48    5.70    6.20    6.44    5.71    5.91    0.36    
Test time         7.38    8.19    7.70    7.93    7.77 

Computing the pearson_baseline similarity matrix...                                                                    
Done computing similarity matrix.                                                                                      
Estimating biases using                                                                                                
als...                                                                                                                 
Computing the pearson_baseline similarity matrix...                                                                    
Done computing similarity matrix.                                                                                      
Estimating biases using                                                                                                
als...                                                                                                                 
Computing the pearson_baseline similarit

Test time         9.19    9.63    10.10   10.36   10.21   9.90    0.43    
{'k': 75, 'name': 'pearson', 'shrinkage': 36, 'user_based': True}                                                      
Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Computing the pearson similarity matrix...                                           

Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).                                                              
                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std                                                  
RMSE (testset)    1.0222  1.0273  1.0223  1.0216  1.0280  1.0243  0.0028  
MAE (testset)     0.8097  0.8132  0.8114  0.8092  0.8116  0.8110  0.0014  
Fit time          4.98    4.47    3.34    4.69    5.14    4.53    0.64    
Test time         9.85    7.68    9.07    9.26    12.56   9.69    1.60    
{'k': 65, 'name': 'pearson', 'shrinkage': 274, 'user_based': False}                                                    
Computing the pearson similarity matrix...                                                                             
Done computing similarity matrix.                                                                                      
Computing the pearson similarity matrix...                                                          

Using the best parameters

In [19]:
algo = SVD(n_factors=svd_best['n_factors'], n_epochs=svd_best['n_epochs'], lr_all=svd_best['lr_all'], reg_all=svd_best['reg_all'])
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9299  0.9291  0.9296  0.9331  0.9291  0.9302  0.0015  
MAE (testset)     0.7334  0.7357  0.7340  0.7364  0.7352  0.7349  0.0011  
Fit time          7.07    7.56    7.65    7.51    7.66    7.49    0.22    
Test time         0.19    0.39    0.28    0.32    0.31    0.30    0.06    


{'test_rmse': array([0.92988825, 0.92907645, 0.92955652, 0.93312181, 0.92913651]),
 'test_mae': array([0.73337068, 0.73567309, 0.73397269, 0.73643192, 0.73524481]),
 'fit_time': (7.073363542556763,
  7.560258388519287,
  7.650027275085449,
  7.509051084518433,
  7.656406402587891),
 'test_time': (0.1905984878540039,
  0.38620615005493164,
  0.2801854610443115,
  0.318373441696167,
  0.31432676315307617)}

In [24]:
best_sim_options = {
    'name': 'msd',
    'user_based': False,
    'shrinkage': knn_best['shrinkage']
}

algo = KNNBasic(k=knn_best['k'], sim_options=best_sim_options)
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9726  0.9865  0.9776  0.9635  0.9733  0.9747  0.0075  
MAE (testset)     0.7700  0.7766  0.7707  0.7620  0.7689  0.7696  0.0047  
Fit time          1.20    1.32    1.21    1.00    1.37    1.22    0.13    
Test time         8.26    6.99    5.70    6.83    9.22    7.40    1.22    


{'test_rmse': array([0.97258777, 0.9864923 , 0.97763001, 0.96348447, 0.97332189]),
 'test_mae': array([0.76999454, 0.77655068, 0.77073785, 0.76197556, 0.76885721]),
 'fit_time': (1.202993392944336,
  1.3241088390350342,
  1.2092125415802002,
  0.9988794326782227,
  1.3740673065185547),
 'test_time': (8.262562274932861,
  6.9908294677734375,
  5.6955389976501465,
  6.833252429962158,
  9.215123176574707)}