In [14]:
import os
from surprise import SVD
from surprise import Dataset, Reader, evaluate, print_perf, GridSearch,accuracy
import pandas as pd 

In [2]:
# path to dataset file
file_path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')

# As we're loading a custom dataset, we need to define a reader. In the
# movielens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
reader = Reader(line_format='user item rating timestamp', sep='\t')

data = Dataset.load_from_file(file_path, reader=reader)
data.split(n_folds=5)

In [4]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}

grid_search = GridSearch(SVD, param_grid, measures=['RMSE', 'FCP'])

In [5]:
grid_search.evaluate(data)

------------
Parameters combination 1 of 8
params:  {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.4}
------------
Mean RMSE: 0.9908
Mean FCP : 0.6829
------------
------------
Parameters combination 2 of 8
params:  {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.6}
------------
Mean RMSE: 0.9977
Mean FCP : 0.6844
------------
------------
Parameters combination 3 of 8
params:  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.4}
------------
Mean RMSE: 0.9735
Mean FCP : 0.6896
------------
------------
Parameters combination 4 of 8
params:  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.6}
------------
Mean RMSE: 0.9821
Mean FCP : 0.6903
------------
------------
Parameters combination 5 of 8
params:  {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.4}
------------
Mean RMSE: 0.9698
Mean FCP : 0.6911
------------
------------
Parameters combination 6 of 8
params:  {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.6}
------------
Mean RMSE: 0.9790
Mean FCP : 0.6912
------------
------------
Paramet

In [6]:
print(grid_search.best_params['RMSE'])

{'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}


In [8]:
results_df = pd.DataFrame.from_dict(grid_search.cv_results)
print(results_df)

        FCP      RMSE  lr_all  n_epochs  \
0  0.682871  0.990792   0.002         5   
1  0.684391  0.997680   0.002         5   
2  0.689575  0.973467   0.002        10   
3  0.690325  0.982126   0.002        10   
4  0.691055  0.969812   0.005         5   
5  0.691210  0.979038   0.005         5   
6  0.693900  0.961165   0.005        10   
7  0.694274  0.971369   0.005        10   

                                              params  \
0   {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.4}   
1   {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.6}   
2  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.4}   
3  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.6}   
4   {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.4}   
5   {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.6}   
6  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}   
7  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.6}   

                                            scores  
0  {'FCP': 0.682871181939, 'RMSE': 0.990791924177}  
1

In [15]:
algo = SVD()

for trainset, testset in data.folds():

    # train and test algorithm.
    algo.train(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    rmse = accuracy.rmse(predictions, verbose=True)

RMSE: 0.9312
RMSE: 0.9422
RMSE: 0.9337
RMSE: 0.9385
RMSE: 0.9366


In [17]:
data = Dataset.load_builtin('ml-100k')

# Retrieve the trainset.
trainset = data.build_full_trainset()

# Build an algorithm, and train it.
algo = SVD()
algo.train(trainset)

In [18]:
uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

user: 196        item: 302        r_ui = 4.00   est = 3.92   {'was_impossible': False}
