In [1]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate


# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

# We'll use the famous SVD algorithm.
algo = SVD()

In [2]:
# Run 5-fold cross-validation and print results
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True, n_jobs = -1)

In [3]:
from surprise import accuracy
from surprise.model_selection import train_test_split

# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=.0001)

# We'll use the famous SVD algorithm.
# algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
# algo.fit(trainset)
# predictions = algo.test(testset)

# Then compute RMSE
# accuracy.rmse(predictions)

In [4]:
testset

[('305', '550', 3.0),
 ('889', '1428', 3.0),
 ('851', '1094', 1.0),
 ('255', '840', 1.0),
 ('301', '503', 3.0),
 ('378', '66', 3.0),
 ('222', '712', 3.0),
 ('326', '131', 2.0),
 ('374', '222', 4.0),
 ('655', '578', 2.0)]

In [11]:
testset.shape

AttributeError: 'list' object has no attribute 'shape'

In [5]:
# Note that you can train and test an algorithm with the following one-line:
predictions = algo.fit(trainset).test(testset)

In [6]:
# from surprise import KNNBasic
# # Retrieve the trainset.
# trainset = data.build_full_trainset()

# # Build an algorithm, and train it.
# algo = KNNBasic()
# algo.fit(trainset)

In [7]:
# uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
# iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# # get a prediction for specific users and items.
# pred = algo.predict(uid, iid, r_ui=4, verbose=True)

In [8]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [9]:
top_n = get_top_n(predictions, n=10)

In [10]:
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

305 ['550']
889 ['1428']
851 ['1094']
255 ['840']
301 ['503']
378 ['66']
222 ['712']
326 ['131']
374 ['222']
655 ['578']
