In [97]:
import os
import surprise
from surprise import Reader, Dataset

file_path = os.path.expanduser('/Users/drakaris/Desktop/personalisation projects/personalization-theory-master/sampled_data.csv')
reader = Reader(line_format='user item rating', sep=',')
data = Dataset.load_from_file(file_path, reader=reader)
data.split(n_folds=5) # data can now be used normally

In [98]:
from surprise import SVD, evaluate, GridSearch
param_grid = {'n_epochs': [20, 25, 30], 'lr_all': [0.01, 0.012],
              'reg_all': [0.04, 0.06]}
grid_search = GridSearch(SVD, param_grid, measures=['RMSE', 'MAE'])
grid_search.evaluate(data)

import pandas as pd  # noqa

results_svd = pd.DataFrame.from_dict(grid_search.cv_results)
print(results_svd)

# best RMSE score
print(grid_search.best_score['RMSE'])
# >>> 1.17046722149

# combination of parameters that gave the best RMSE score
print(grid_search.best_params['RMSE'])
# >>> {'lr_all': 0.012, 'reg_all': 0.06, 'n_epochs': 25}

# best MAE score
print(grid_search.best_score['MAE'])
# >>> 0.909007988848

# combination of parameters that gave the best MAE score
print(grid_search.best_params['MAE'])
# >>> {'lr_all': 0.012, 'reg_all': 0.06, 'n_epochs': 30}



[{u'lr_all': 0.01, u'reg_all': 0.04, u'n_epochs': 20}, {u'lr_all': 0.01, u'reg_all': 0.04, u'n_epochs': 25}, {u'lr_all': 0.01, u'reg_all': 0.04, u'n_epochs': 30}, {u'lr_all': 0.01, u'reg_all': 0.06, u'n_epochs': 20}, {u'lr_all': 0.01, u'reg_all': 0.06, u'n_epochs': 25}, {u'lr_all': 0.01, u'reg_all': 0.06, u'n_epochs': 30}, {u'lr_all': 0.012, u'reg_all': 0.04, u'n_epochs': 20}, {u'lr_all': 0.012, u'reg_all': 0.04, u'n_epochs': 25}, {u'lr_all': 0.012, u'reg_all': 0.04, u'n_epochs': 30}, {u'lr_all': 0.012, u'reg_all': 0.06, u'n_epochs': 20}, {u'lr_all': 0.012, u'reg_all': 0.06, u'n_epochs': 25}, {u'lr_all': 0.012, u'reg_all': 0.06, u'n_epochs': 30}]
------------
Parameters combination 1 of 12
params:  {u'lr_all': 0.01, u'reg_all': 0.04, u'n_epochs': 20}
------------
Mean RMSE: 1.1846
Mean MAE : 0.9228
------------
------------
Parameters combination 2 of 12
params:  {u'lr_all': 0.01, u'reg_all': 0.04, u'n_epochs': 25}
------------
Mean RMSE: 1.1803
Mean MAE : 0.9182
------------
---------

In [99]:
from surprise import NMF, evaluate
param_grid = {'n_factors': [20, 25, 30],'n_epochs': [30, 40, 50] }
grid_search = GridSearch(NMF, param_grid, measures=['RMSE', 'MAE'])
grid_search.evaluate(data)

import pandas as pd  # noqa

results_nmf = pd.DataFrame.from_dict(grid_search.cv_results)
print(results_nmf)

# best RMSE score
print(grid_search.best_score['RMSE'])
# >>> 1.17046722149

# combination of parameters that gave the best RMSE score
print(grid_search.best_params['RMSE'])
# >>> {'lr_all': 0.012, 'reg_all': 0.06, 'n_epochs': 25}

# best MAE score
print(grid_search.best_score['MAE'])
# >>> 0.909007988848

# combination of parameters that gave the best MAE score
print(grid_search.best_params['MAE'])
# >>> {'lr_all': 0.012, 'reg_all': 0.06, 'n_epochs': 30}



[{u'n_factors': 20, u'n_epochs': 30}, {u'n_factors': 20, u'n_epochs': 40}, {u'n_factors': 20, u'n_epochs': 50}, {u'n_factors': 25, u'n_epochs': 30}, {u'n_factors': 25, u'n_epochs': 40}, {u'n_factors': 25, u'n_epochs': 50}, {u'n_factors': 30, u'n_epochs': 30}, {u'n_factors': 30, u'n_epochs': 40}, {u'n_factors': 30, u'n_epochs': 50}]
------------
Parameters combination 1 of 9
params:  {u'n_factors': 20, u'n_epochs': 30}
------------
Mean RMSE: 1.2750
Mean MAE : 0.9909
------------
------------
Parameters combination 2 of 9
params:  {u'n_factors': 20, u'n_epochs': 40}
------------
Mean RMSE: 1.2859
Mean MAE : 1.0060
------------
------------
Parameters combination 3 of 9
params:  {u'n_factors': 20, u'n_epochs': 50}
------------
Mean RMSE: 1.2832
Mean MAE : 1.0064
------------
------------
Parameters combination 4 of 9
params:  {u'n_factors': 25, u'n_epochs': 30}
------------
Mean RMSE: 1.2768
Mean MAE : 0.9794
------------
------------
Parameters combination 5 of 9
params:  {u'n_factors':

In [134]:
"""
This module illustrates how to retrieve the top-10 items with highest rating
prediction. We first train an SVD algorithm on the MovieLens dataset, and then
predict all the ratings for the pairs (user, item) that are not in the training
set. We then retrieve the top-10 prediction for each user.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from collections import defaultdict



def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''
    i=0
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for i in range(0,5):
        for uid, iid, true_r, est, _ in predictions[i]:
            top_n[uid].append([iid, est])
            
            

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

algo = SVD()
for trainset, testset in data.folds():
    # train and test algorithm.
    algo.train(trainset)
    predictions[i] = algo.test(testset)
    #print(len(predictions[i]))
    i=i+1
    

top_n = get_top_n(predictions, n=10)

i=0
#Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

A2QHVQSB7O0OTE [u'B0050SZ836']
A2WO7CURD15SXG [u'B00D7NQP9M']
A16WKPOSEGQC3Y [u'B006P5RW3M', u'B0050SZ836', u'B003ICGL7I']
A2XTHQ9DKIUZXZ [u'B006WQR3OM', u'B0050SWS8O']
A36UKFV79879MD [u'B002I092MM', u'B000FQBPDU']
AKHSZLEPUTTY [u'B002BSC54I']
A2IXIL77D51CAZ [u'B004FYEZMQ']
A26LHI94KJNO [u'B0050SZ836']
A32ZGK4SX98400 [u'B002I0K3M0']
A2BXZM08OG625Z [u'B004FYEZMQ']
A1SRDTY3LVYMOM [u'B0029LJIFG']
AMXD8CPSN32GS [u'B002BRZ9G0']
AHSFXNTBJYNK4 [u'B007SRM5U6']
A1WJW2OIR5BZ58 [u'B00GOOSTFE']
AW4SW0UXMRHAE [u'B002I0J82G']
A2CACYNRZ337HQ [u'B006WQR3OM']
A18NQZS6ETRFKX [u'B002BSC54I', u'B00004Y57G']
A2PPEFFIDXGCQU [u'B001K7HV3Q']
AAPZ1OVFQQIDO [u'B004FYEZMQ']
A1ESCF1CWAOUVK [u'B002BRZ9G0']
A2NOW4U7W3F7RI [u'B00CMQTVUA']
A3SDGRDVZZFUZK [u'B0029LJIFG']
A11A9AVEM5EVU4 [u'B006WQR3OM', u'B0047THYWC', u'B003ICGL7I']
A1JCVVDJAMWHR3 [u'B00B67ZTUW']
AYKOEZFVI4TGP [u'B0053BCMAC', u'B002I0K3M0', u'B002I092MM', u'B002ELCUUG']
A2WUS3SV2I2VLG [u'B0029LJIFG']
A3A9X2PYPAE0Z4 [u'B0053B5RGI']
AUFQ3KOT6IF5S [u'B007X

A3GAQ8EHA5RNXZ [u'B00BMFIXOW']
AZY1N2YRSROMQ [u'B00D7NQP9M']
A3CO1P0AI9AEWR [u'B005EQE0YM']
A26JPPFJ1WLTGY [u'B007XVTR5S', u'B007XVTR3K', u'B003JVKHEQ', u'B002I0J82G']
A130D9ULBKIXL2 [u'B00CMQTVUA']
A2WU2WW034MN0C [u'B00BMFIXOW']
A22TDUGDO0LBT4 [u'B00B29S1JK']
ANE93KIVSC21S [u'B002I0H1DY']
AIZS7SM0PTT2B [u'B004FYEZMQ', u'B0029LJIFG']
A3DI3261LBFACQ [u'B004FYEZMQ']
A1FSB0ABKLVCPD [u'B005EQE0YM']
A1WLYBOGZNC1V8 [u'B0029LJIFG']
A3AVJ9W2VK15YA [u'B00003OTI3']
A20XZQNHVICSE0 [u'B007XVTR3K']
A1EAJX5PH6TLE3 [u'B0053BCMAC']
A1SQL0AN5ZLOZK [u'B005EQE0YM']
A20DRRKAN5Z9Q [u'B006JEE05W', u'B005C2D2H4', u'B0050SZ836']
A28IBOLVCSQRLQ [u'B00CMQTVUA']
ABSWT9Y9UUQ6W [u'B0053BCMAC']
A3GSMD3LG5RMSV [u'B00CMQTVUA']
ALIDGWFTNIUMV [u'B0029LJIFG']
A1MVXNVH1RVMN7 [u'B007XVTR5S']
A28CCN6VYHRUKS [u'B007XVTR3K']
ASAH67W5LWK3C [u'B007XVTR5S']
A3GKJYVADM7NGF [u'B003OPX802']
A2IJ54FX1L83WK [u'B0088MVPRY']
ANZ2TQZPNB5KL [u'B004FYEZMQ']
A360QXKTMHPXNS [u'B0037LTTRO']
A11JEFCFYWYV4W [u'B003JVKHEQ']
A1NAIEOE8B59TG [u'B