In [1]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import NormalPredictor, BaselineOnly, KNNBaseline, SVD, SVDpp, SlopeOne

import matplotlib.pyplot as plt


In [2]:
# build dataset from csv
file_path = "./BX-CSV/BookRatings.csv"
# header needs to be skipped
reader = Reader(line_format='user item rating', sep=',', rating_scale=(0,10), skip_lines=1)
data = Dataset.load_from_file(file_path, reader=reader)


In [3]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), NormalPredictor(), KNNBaseline(), BaselineOnly(),SlopeOne()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
benchmark = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')
benchmark

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BaselineOnly,3.37825,0.14658,0.265521
SlopeOne,3.475073,0.604402,3.159538
KNNBaseline,3.49436,0.730095,4.974712
SVD,3.542505,5.208315,0.365527
SVDpp,3.814625,89.366484,4.00034
NormalPredictor,4.73365,0.127529,0.350858
