In [0]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import random
import scipy.spatial.distance
from surprise import BaselineOnly, KNNBasic
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import similarities
from surprise import SVD, SVDpp, SlopeOne, NMF, NormalPredictor, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, BaselineOnly, CoClustering

In [0]:
!pip install surprise

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB)
[K     |████████████████████████████████| 6.5MB 3.0MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1673644 sha256=3132e462d6b817955ddb63a10e94bf96982432efac8d73b0616d03ed1eb606bc
  Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.0 surprise-0.1


In [0]:
df= pd.read_csv('df_sub.csv.gz', 
                       compression='gzip').astype({'rating':'int8', 'total_votes':'int32'})

In [0]:
reader = Reader(rating_scale=(0,5))
data = Dataset.load_from_df(df[['reviewerId', 'asin','rating']], reader)

# New Section

In [0]:
 benchmark = []
 
 for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
   results = cross_validate(algorithm, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)
   tmp = pd.DataFrame.from_dict(results).mean(axis=0)
   tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
   benchmark.append(tmp)

final_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')
final_results

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8601  0.8606  0.8656  0.8621  0.0025  
MAE (testset)     0.6644  0.6657  0.6693  0.6665  0.0021  
Fit time          12.54   12.99   12.83   12.79   0.19    
Test time         0.95    1.29    1.09    1.11    0.14    
Evaluating RMSE, MAE of algorithm SVDpp on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8599  0.8588  0.8585  0.8591  0.0006  
MAE (testset)     0.6624  0.6599  0.6613  0.6612  0.0010  
Fit time          363.73  345.33  351.35  353.47  7.66    
Test time         17.77   18.45   19.33   18.52   0.64    
Evaluating RMSE, MAE of algorithm SlopeOne on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8916  0.8952  0.8892  0.8920  0.0025  
MAE (testset)     0.6822  0.6823  0.6826  0.6824  0.0002  
Fit time          7.21    8.16    7.53    7.63    0.39    
Test 

Unnamed: 0_level_0,test_rmse,test_mae,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BaselineOnly,0.855957,0.666846,1.05112,1.013781
SVDpp,0.859079,0.661183,353.472611,18.518866
SVD,0.862076,0.666452,12.785394,1.107035
KNNBaseline,0.868869,0.666314,1.774138,7.893475
KNNWithMeans,0.871516,0.666048,0.896398,7.516442
KNNWithZScore,0.875444,0.665424,1.186114,7.462321
CoClustering,0.891596,0.681144,6.491434,1.251335
SlopeOne,0.891981,0.682352,7.632689,13.265806
NMF,0.924378,0.716844,17.285174,0.939333
KNNBasic,0.94435,0.735248,0.768897,5.848725


In [0]:
final_results.to_csv('results_cv_recommnder.csv', header=True)