In [1]:
!pip install scikit-surprise



In [2]:
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNWithZScore
from surprise import KNNBaseline
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms.random_pred import NormalPredictor

In [3]:
data = Dataset.load_builtin('ml-100k')

In [4]:
full_data = data.build_full_trainset()
print("Number of users: ", data.build_full_trainset().n_users)
print("Number of movies: ", data.build_full_trainset().n_items)
print("Number of ratings: ", data.build_full_trainset().n_ratings)
print("Rating scale: ", data.build_full_trainset().rating_scale)

Number of users:  943
Number of movies:  1682
Number of ratings:  100000
Rating scale:  (1, 5)


In [5]:
algorithms = {
    'KNNBasic': KNNBasic(),
    'KNNWithMeans': KNNWithMeans(),
    'KNNWithZScore': KNNWithZScore(),
    'KNNBaseline': KNNBaseline(),
    'NormalPredictor': NormalPredictor()
}

In [6]:
for k in algorithms:
  print(f'\nAlgorithm {k}')
  cross_validate(algorithms[k], data, measures=['MAE'], cv=5, verbose=True);


Algorithm KNNBasic
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7717  0.7777  0.7757  0.7743  0.7672  0.7733  0.0036  
Fit time          0.30    0.36    0.32    0.32    0.32    0.32    0.02    
Test time         3.37    3.40    3.42    3.40    3.25    3.37    0.06    

Algorithm KNNWithMeans
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd s

In [7]:
trainset, testset = train_test_split(data, test_size=.3)

for k in algorithms:
  algorithm = algorithms[k]
  print(f'\nAlgorithm {k}')
  algorithm.fit(trainset)
  predictions = algorithm.test(testset)
  accuracy.mae(predictions)



Algorithm KNNBasic
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.7789

Algorithm KNNWithMeans
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.7525

Algorithm KNNWithZScore
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.7500

Algorithm KNNBaseline
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.7383

Algorithm NormalPredictor
MAE:  1.2176
