In [38]:
from surprise import SVD, Dataset, accuracy
from surprise.model_selection import train_test_split, cross_validate
from sklearn.pipeline import Pipeline

## Data Import

In [None]:
data_raw = Dataset.load_builtin('ml-100k')

In [67]:
from surprise import Reader
from pathlib import Path

path = Path(Path.cwd(), 'data', 'movielens', 'ml-latest-small')
reader = Reader(line_format = 'user item rating timestamp', sep=',', skip_lines=1)
rating_data = Dataset.load_from_file(Path(path, 'ratings.csv'), reader)
rating_data

<surprise.dataset.DatasetAutoFolds at 0x168d9e6b2b0>

In [68]:
train, test = train_test_split(rating_data, test_size=.2)

## Pipeline 1

In [69]:
alg = SVD()

In [70]:
alg.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x168d05fb4c0>

In [71]:
preds = alg.test(test)

In [72]:
from surprise import accuracy
print(f'rmse: {accuracy.rmse(preds)}')
print(f'mae: {accuracy.mae(preds)}')

RMSE: 0.8790
rmse: 0.8790195558866146
MAE:  0.6733
mae: 0.6733243143490691


In [73]:
cross_validate(algo, data_raw, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9263  0.9345  0.9423  0.9441  0.9340  0.9362  0.0064  
MAE (testset)     0.7324  0.7356  0.7412  0.7424  0.7383  0.7380  0.0037  
Fit time          3.88    3.73    3.73    3.80    3.74    3.78    0.06    
Test time         0.20    0.12    0.19    0.11    0.19    0.16    0.04    


{'test_rmse': array([0.92626608, 0.93446096, 0.94233422, 0.94414915, 0.93402244]),
 'test_mae': array([0.73240561, 0.73559968, 0.7412094 , 0.74238036, 0.73829496]),
 'fit_time': (3.879788637161255,
  3.733160972595215,
  3.726440668106079,
  3.8010663986206055,
  3.7395434379577637),
 'test_time': (0.19653606414794922,
  0.11800026893615723,
  0.19199919700622559,
  0.1139686107635498,
  0.1889801025390625)}

## Pipeline 2

In [57]:
from surprise.prediction_algorithms.knns import KNNBasic

opts = {'name': 'pearson_baseline', 'user_based':True}
user_model = KNNBasic(sim_options=opts)

In [58]:
user_model.fit(train)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x168d05fb460>

In [60]:
preds1 = user_model.test(test)

display(accuracy.rmse(preds1))
display(accuracy.mae(preds1))

RMSE: 1.0033


1.003272615449386

MAE:  0.7948


0.7947790716816

In [61]:
opts = {'name': 'pearson_baseline', 'user_based': False} #other similarity options: pearson, cosine
item_model = KNNBasic(sim_options=opts)

In [62]:
item_model.fit(train)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


In [63]:
preds2 = item_model.test(test)

display(accuracy.rmse(preds))
display(accuracy.mae(preds))

RMSE: 1.0033


1.003272615449386

MAE:  0.7948


0.7947790716816