# Pipeline 1

## Load the data

In [11]:
!pip install surprise



In [12]:
from surprise import SVD, Dataset, accuracy
from surprise.model_selection import train_test_split, cross_validate
from sklearn.pipeline import Pipeline

In [13]:
from surprise import Dataset

data = Dataset.load_builtin('ml-100k')

## Train-Test Split Data

In [14]:
from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)

In [15]:
test

[('907', '143', 5.0),
 ('371', '210', 4.0),
 ('218', '42', 4.0),
 ('829', '170', 4.0),
 ('733', '277', 1.0),
 ('363', '1512', 1.0),
 ('193', '487', 5.0),
 ('808', '313', 5.0),
 ('557', '682', 2.0),
 ('774', '196', 3.0),
 ('638', '118', 3.0),
 ('632', '81', 5.0),
 ('417', '200', 4.0),
 ('580', '471', 3.0),
 ('640', '91', 4.0),
 ('450', '328', 4.0),
 ('596', '13', 2.0),
 ('586', '467', 4.0),
 ('653', '502', 2.0),
 ('378', '517', 3.0),
 ('405', '65', 1.0),
 ('279', '399', 4.0),
 ('327', '293', 3.0),
 ('346', '276', 1.0),
 ('59', '928', 4.0),
 ('514', '22', 4.0),
 ('807', '402', 5.0),
 ('473', '327', 3.0),
 ('342', '324', 1.0),
 ('269', '136', 4.0),
 ('654', '1', 4.0),
 ('250', '28', 4.0),
 ('282', '689', 2.0),
 ('534', '619', 4.0),
 ('194', '481', 3.0),
 ('184', '118', 2.0),
 ('291', '739', 3.0),
 ('293', '31', 2.0),
 ('943', '1028', 2.0),
 ('65', '69', 3.0),
 ('562', '135', 5.0),
 ('466', '62', 3.0),
 ('847', '317', 3.0),
 ('650', '521', 3.0),
 ('656', '326', 1.0),
 ('366', '53', 5.0),
 

In [16]:
train.n_users, train.n_items

(943, 1651)

## Train Model

In [17]:
from surprise import SVD

model = SVD()

In [18]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x122a2ff10>

## Make Prediction

In [19]:
predictions = model.test(test)
predictions

[Prediction(uid='907', iid='143', r_ui=5.0, est=4.6457752235454945, details={'was_impossible': False}),
 Prediction(uid='371', iid='210', r_ui=4.0, est=4.25044967536775, details={'was_impossible': False}),
 Prediction(uid='218', iid='42', r_ui=4.0, est=3.084523565047874, details={'was_impossible': False}),
 Prediction(uid='829', iid='170', r_ui=4.0, est=3.9854461058348973, details={'was_impossible': False}),
 Prediction(uid='733', iid='277', r_ui=1.0, est=3.1109788244843166, details={'was_impossible': False}),
 Prediction(uid='363', iid='1512', r_ui=1.0, est=3.654295878059658, details={'was_impossible': False}),
 Prediction(uid='193', iid='487', r_ui=5.0, est=3.556507566231389, details={'was_impossible': False}),
 Prediction(uid='808', iid='313', r_ui=5.0, est=4.887637586918707, details={'was_impossible': False}),
 Prediction(uid='557', iid='682', r_ui=2.0, est=3.4925805240867653, details={'was_impossible': False}),
 Prediction(uid='774', iid='196', r_ui=3.0, est=2.713974095049815, det

## Evaluation

In [20]:
from surprise import accuracy

accuracy.rmse(predictions=predictions)

RMSE: 0.9358


0.935762702593739

In [21]:
accuracy.mae(predictions=predictions)

MAE:  0.7366


0.7366306579359756

# Pipeline 2

## Cross Validation

In [22]:
from surprise.model_selection import cross_validate

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9297  0.9415  0.9288  0.9367  0.9400  0.9353  0.0053  
MAE (testset)     0.7310  0.7420  0.7335  0.7374  0.7424  0.7373  0.0045  
Fit time          7.22    7.10    6.69    6.77    6.34    6.82    0.31    
Test time         0.28    0.27    0.21    0.25    0.31    0.26    0.03    


{'test_rmse': array([0.92966822, 0.94154155, 0.9287507 , 0.93669324, 0.94004026]),
 'test_mae': array([0.73100493, 0.74201722, 0.73348479, 0.73735109, 0.74244411]),
 'fit_time': (7.218548059463501,
  7.097961187362671,
  6.687563896179199,
  6.771727800369263,
  6.339550971984863),
 'test_time': (0.2825958728790283,
  0.271899938583374,
  0.21038198471069336,
  0.2540149688720703,
  0.30500102043151855)}

In [23]:
from surprise.prediction_algorithms.knns import KNNBasic

opts = {'name': 'pearson_baseline', 'user_based':True}
model = KNNBasic(sim_options=opts)

In [24]:
model.fit(train)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x125fee8e0>

In [27]:
pred = model.test(test)

display(accuracy.rmse(pred))
display(accuracy.mae(pred))

RMSE: 1.0008


1.0007997542372822

MAE:  0.7897


0.7897024955421307

In [28]:
opts = {'name': 'pearson_baseline', 'user_based': False} 
item_model = KNNBasic(sim_options=opts)

In [29]:
item_model.fit(train)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x110b65ee0>

In [31]:
pred1 = item_model.test(test)

display(accuracy.rmse(pred))
display(accuracy.mae(pred))

RMSE: 1.0008


1.0007997542372822

MAE:  0.7897


0.7897024955421307

In [32]:
import os
import sys
os.path.dirname(sys.executable)

'/Library/Frameworks/Python.framework/Versions/3.9/bin'