# Collaborative filtering using `surprise`

In [21]:
from surprise import SVD
from surprise import SVDpp
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms.random_pred import NormalPredictor

In [17]:
data = Dataset.load_builtin('ml-100k')

In [11]:
full_data = data.build_full_trainset()
print("Number of users: ", data.build_full_trainset().n_users)
print("Number of movies: ", data.build_full_trainset().n_items)
print("Number of ratings: ", data.build_full_trainset().n_ratings)
print("Rating scale: ", data.build_full_trainset().rating_scale)

Number of users:  943
Number of movies:  1682
Number of ratings:  100000
Rating scale:  (1, 5)


In [22]:
svd = SVD()
svdpp = SVDpp()
rand = NormalPredictor()

## CrossValidation

In [25]:
cross_validate(svd, data, measures=['MAE'], cv=5, verbose=True)
cross_validate(svdpp, data, measures=['MAE'], cv=5, verbose=True)
cross_validate(rand, data, measures=['MAE'], cv=5, verbose=True);

Evaluating MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7385  0.7373  0.7375  0.7405  0.7425  0.7392  0.0020  
Fit time          5.04    4.87    4.80    5.45    5.88    5.21    0.40    
Test time         0.18    0.13    0.27    0.13    0.15    0.17    0.05    
Evaluating MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7206  0.7252  0.7210  0.7167  0.7243  0.7216  0.0030  
Fit time          191.19  229.83  204.98  211.68  203.86  208.31  12.64   
Test time         3.03    3.54    2.97    3.19    3.67    3.28    0.28    
Evaluating MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     1.2258  1.2243  1.2168  1.2190  1.2153  1.2202  0.0041  
Fit time          0.10    0.28    0.12    0.12    0.16    0.15    0.06    
Test time        

{'test_mae': array([1.22578528, 1.22432402, 1.2168179 , 1.21899935, 1.215309  ]),
 'fit_time': (0.09746003150939941,
  0.27556514739990234,
  0.11823701858520508,
  0.11758995056152344,
  0.15800189971923828),
 'test_time': (0.6158480644226074,
  0.3149087429046631,
  0.12633109092712402,
  0.13491082191467285,
  0.40221405029296875)}

## Train-Test split

In [24]:
trainset, testset = train_test_split(data, test_size=.3)

svd.fit(trainset)
svdpp.fit(trainset)
rand.fit(trainset)

svd_predictions = svd.test(testset)
svdpp_predictions = svdpp.test(testset)
rand_predictions = rand.test(testset)

accuracy.mae(svd_predictions)
accuracy.mae(svdpp_predictions)
accuracy.mae(rand_predictions);

MAE:  0.7420
MAE:  0.7239
MAE:  1.2216


1.221561021946232