## Import libraries and data

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Dataset
from surprise import Reader

In [9]:
#Import the movielens dataset from the Surprise built-in datasets
movielens = Dataset.load_builtin('ml-100k')

### Memory based approach

#### With fixed parameter values

In [4]:
from surprise import KNNWithMeans

# To use item-based cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": False,  # Compute  similarities between items
}
algo = KNNWithMeans(sim_options=sim_options)

In [20]:
trainingSet = movielens.build_full_trainset()

In [11]:
algo.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x12af016c470>

In [21]:
testingSet = trainingSet.build_anti_testset()
#predictions = algo.test(testingSet)

#### Using Grid Search

In [13]:
from surprise.model_selection import GridSearchCV

In [14]:
sim_options_grid = {
    "name": ["msd", "cosine"],
    "min_support": [3, 4, 5],
    "user_based": [False, True],
}

param_grid = {"sim_options": sim_options_grid}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(movielens)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

### Model based approach

In [15]:
from surprise import SVD

In [17]:
param_grid = {
    "n_epochs": [5, 10],
    "lr_all": [0.002, 0.005],
    "reg_all": [0.4, 0.6]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

gs.fit(movielens)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

0.9642573605088672
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


### Get top-10 recommendations for each user

In [18]:
from collections import defaultdict

In [19]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [23]:
#Initialize SVD() object using the parameter values returned by grid search
algo = SVD(n_epochs=10, lr_all=0.005, reg_all=0.4)
algo.fit(trainingSet)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x12af12192e8>

In [25]:
#Get predictions
predictions = algo.test(testingSet)

In [26]:
#Get top 10 recommendations for each user
top_n = get_top_n(predictions, n=10)

In [27]:
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

196 ['408', '169', '318', '64', '114', '483', '12', '603', '178', '50']
186 ['408', '169', '318', '64', '483', '114', '603', '178', '50', '513']
22 ['408', '169', '318', '64', '114', '483', '12', '603', '178', '513']
244 ['408', '483', '12', '603', '178', '513', '480', '127', '272', '657']
166 ['408', '169', '318', '64', '114', '483', '12', '603', '178', '50']
298 ['408', '169', '64', '114', '12', '513', '480', '272', '657', '515']
115 ['408', '169', '318', '64', '114', '483', '603', '513', '480', '272']
253 ['408', '169', '114', '603', '178', '513', '480', '357', '272', '174']
305 ['114', '603', '513', '657', '515', '313', '316', '488', '22', '641']
6 ['114', '603', '657', '251', '313', '963', '172', '316', '923', '190']
62 ['408', '169', '603', '178', '513', '480', '272', '657', '515', '427']
286 ['318', '64', '114', '12', '603', '178', '513', '480', '657', '98']
200 ['408', '64', '114', '12', '603', '178', '513', '480', '127', '272']
210 ['408', '169', '318', '64', '12', '603', '178

In [28]:
predictions

[Prediction(uid='196', iid='302', r_ui=3.52986, est=4.015942610261904, details={'was_impossible': False}),
 Prediction(uid='196', iid='377', r_ui=3.52986, est=2.990534557898229, details={'was_impossible': False}),
 Prediction(uid='196', iid='51', r_ui=3.52986, est=3.4777022466265555, details={'was_impossible': False}),
 Prediction(uid='196', iid='346', r_ui=3.52986, est=3.653539997560345, details={'was_impossible': False}),
 Prediction(uid='196', iid='474', r_ui=3.52986, est=4.045346047770921, details={'was_impossible': False}),
 Prediction(uid='196', iid='265', r_ui=3.52986, est=3.8131026860878854, details={'was_impossible': False}),
 Prediction(uid='196', iid='465', r_ui=3.52986, est=3.5308273835020487, details={'was_impossible': False}),
 Prediction(uid='196', iid='451', r_ui=3.52986, est=3.3710081758417894, details={'was_impossible': False}),
 Prediction(uid='196', iid='86', r_ui=3.52986, est=3.8170843353716677, details={'was_impossible': False}),
 Prediction(uid='196', iid='1014',

In [30]:
movielens

<surprise.reader.Reader at 0x12adc4ca2e8>