# Pipeline 1

## Load the data

In [2]:
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')

## Train-Test Split Data

In [3]:
from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)

In [4]:
test

[('907', '143', 5.0),
 ('371', '210', 4.0),
 ('218', '42', 4.0),
 ('829', '170', 4.0),
 ('733', '277', 1.0),
 ('363', '1512', 1.0),
 ('193', '487', 5.0),
 ('808', '313', 5.0),
 ('557', '682', 2.0),
 ('774', '196', 3.0),
 ('638', '118', 3.0),
 ('632', '81', 5.0),
 ('417', '200', 4.0),
 ('580', '471', 3.0),
 ('640', '91', 4.0),
 ('450', '328', 4.0),
 ('596', '13', 2.0),
 ('586', '467', 4.0),
 ('653', '502', 2.0),
 ('378', '517', 3.0),
 ('405', '65', 1.0),
 ('279', '399', 4.0),
 ('327', '293', 3.0),
 ('346', '276', 1.0),
 ('59', '928', 4.0),
 ('514', '22', 4.0),
 ('807', '402', 5.0),
 ('473', '327', 3.0),
 ('342', '324', 1.0),
 ('269', '136', 4.0),
 ('654', '1', 4.0),
 ('250', '28', 4.0),
 ('282', '689', 2.0),
 ('534', '619', 4.0),
 ('194', '481', 3.0),
 ('184', '118', 2.0),
 ('291', '739', 3.0),
 ('293', '31', 2.0),
 ('943', '1028', 2.0),
 ('65', '69', 3.0),
 ('562', '135', 5.0),
 ('466', '62', 3.0),
 ('847', '317', 3.0),
 ('650', '521', 3.0),
 ('656', '326', 1.0),
 ('366', '53', 5.0),
 

In [5]:
train.n_users, train.n_items

(943, 1651)

## Train Model

In [6]:
from surprise import SVD

model = SVD()

In [7]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x106e584c0>

## Make Prediction

In [8]:
predictions = model.test(test)
predictions

[Prediction(uid='907', iid='143', r_ui=5.0, est=4.65114137009985, details={'was_impossible': False}),
 Prediction(uid='371', iid='210', r_ui=4.0, est=4.379585585310621, details={'was_impossible': False}),
 Prediction(uid='218', iid='42', r_ui=4.0, est=3.363727017768004, details={'was_impossible': False}),
 Prediction(uid='829', iid='170', r_ui=4.0, est=3.8388790953134317, details={'was_impossible': False}),
 Prediction(uid='733', iid='277', r_ui=1.0, est=3.0960659603928105, details={'was_impossible': False}),
 Prediction(uid='363', iid='1512', r_ui=1.0, est=3.935863627549409, details={'was_impossible': False}),
 Prediction(uid='193', iid='487', r_ui=5.0, est=3.612314987681681, details={'was_impossible': False}),
 Prediction(uid='808', iid='313', r_ui=5.0, est=4.8866438045339615, details={'was_impossible': False}),
 Prediction(uid='557', iid='682', r_ui=2.0, est=3.3406227263114956, details={'was_impossible': False}),
 Prediction(uid='774', iid='196', r_ui=3.0, est=2.525871948891094, det

## Evaluation

In [9]:
from surprise import accuracy

accuracy.rmse(predictions=predictions)

RMSE: 0.9371


0.9371383363912988

In [10]:
accuracy.mae(predictions=predictions)

MAE:  0.7392


0.739243774025801

# Pipeline 2

## Cross Validation

In [17]:
from surprise.model_selection import cross_validate

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9362  0.9383  0.9329  0.9344  0.9432  0.9370  0.0036  
MAE (testset)     0.7370  0.7385  0.7326  0.7378  0.7446  0.7381  0.0039  
Fit time          7.73    7.53    8.82    6.96    7.39    7.69    0.62    
Test time         0.20    0.18    0.27    0.27    0.23    0.23    0.04    


{'test_rmse': array([0.93624719, 0.93828004, 0.93288294, 0.93436359, 0.94322712]),
 'test_mae': array([0.73695227, 0.73846043, 0.73259751, 0.73777265, 0.74462474]),
 'fit_time': (7.730445146560669,
  7.532963752746582,
  8.817882776260376,
  6.963747024536133,
  7.3922600746154785),
 'test_time': (0.19904184341430664,
  0.1802382469177246,
  0.26851511001586914,
  0.2726452350616455,
  0.23450398445129395)}