
# From surprise

In [7]:
from surprise import Dataset

ratings = Dataset.load_builtin('ml-100k')
ratings

<surprise.dataset.DatasetAutoFolds at 0x10d5492b0>

In [8]:
from surprise.dataset import DatasetAutoFolds

def load_ratings_from_surprise() -> DatasetAutoFolds:
    ratings = Dataset.load_builtin('ml-100k')
    return ratings

load_ratings_from_surprise()

<surprise.dataset.DatasetAutoFolds at 0x10d549c40>


# From file

In [12]:
from surprise import Reader
from pathlib import Path
import pandas as pd


reader = Reader(line_format = 'user item rating timestamp', sep=',', skip_lines=1)
rating_data = Dataset.load_from_file('ratings.csv', reader)
rating_data

<surprise.dataset.DatasetAutoFolds at 0x103e5f4c0>

# Modular function

In [14]:
def get_data(from_surprise : bool = True) -> DatasetAutoFolds:
    data = load_ratings_from_surprise() if from_surprise else load_ratings_from_file()
    return data

data = get_data(from_surprise=True)
data

<surprise.dataset.DatasetAutoFolds at 0x10d3bea30>

# Manual pipeline


# Split data in train and test

In [15]:
from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)
train 

<surprise.trainset.Trainset at 0x103ed56d0>

In [16]:
train.n_users, train.n_items

(943, 1651)

# Train model

In [18]:
from surprise import SVD

model = SVD()

In [19]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x103ed5250>

In [20]:
from surprise.trainset import Trainset
from  surprise.prediction_algorithms.algo_base import AlgoBase

from surprise.prediction_algorithms.knns import KNNBasic


def get_trained_model(model_class: AlgoBase, model_kwargs: dict, train_set: Trainset) -> AlgoBase:
    model = model_class(sim_options = model_kwargs)
    model.fit(train_set)
    return model

model_kwargs = {'sim_options': {'user_based': False, 'name': 'pearson'}}
get_trained_model(KNNBasic, {'user_based': False, 'name': 'pearson'}, train)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x10d4776d0>

# Make predictions

In [21]:
predictions = model.test(test)
predictions[:10]

[Prediction(uid='907', iid='143', r_ui=5.0, est=4.972875011006883, details={'was_impossible': False}),
 Prediction(uid='371', iid='210', r_ui=4.0, est=4.311637716684491, details={'was_impossible': False}),
 Prediction(uid='218', iid='42', r_ui=4.0, est=3.2653922837229876, details={'was_impossible': False}),
 Prediction(uid='829', iid='170', r_ui=4.0, est=3.945460496452226, details={'was_impossible': False}),
 Prediction(uid='733', iid='277', r_ui=1.0, est=3.0020211542011297, details={'was_impossible': False}),
 Prediction(uid='363', iid='1512', r_ui=1.0, est=3.0604995636621077, details={'was_impossible': False}),
 Prediction(uid='193', iid='487', r_ui=5.0, est=3.7043226721326517, details={'was_impossible': False}),
 Prediction(uid='808', iid='313', r_ui=5.0, est=4.751743865630858, details={'was_impossible': False}),
 Prediction(uid='557', iid='682', r_ui=2.0, est=3.060165137476779, details={'was_impossible': False}),
 Prediction(uid='774', iid='196', r_ui=3.0, est=2.6485242952388544, d

# Evaluation

In [22]:
from surprise import accuracy

accuracy.rmse(predictions=predictions)

RMSE: 0.9356


0.9356351043753047

In [23]:
accuracy.mae(predictions=predictions)

MAE:  0.7381


0.7380753896138278

In [24]:
from surprise import accuracy

def evaluate_model(model: AlgoBase, test_set: [(int, int, float)]) -> dict:
    predictions = model.test(test_set)
    metrics_dict = {}
    metrics_dict['RMSE'] = accuracy.rmse(predictions, verbose=False)
    metrics_dict['MAE'] = accuracy.rmse(predictions, verbose=False)
    return metrics_dict

# Modular code¶

In [25]:
from surprise.model_selection import train_test_split


from surprise.prediction_algorithms.knns import KNNBasic

def train_and_evalute_model_pipeline(model_class: AlgoBase, model_kwargs: dict = {},
                                     from_surprise: bool = True,
                                     test_size: float = 0.2) -> (AlgoBase, dict):
    data = get_data(from_surprise)
    train_set, test_set = train_test_split(data, test_size, random_state=42)
    model = get_trained_model(model_class, model_kwargs, train_set)
    metrics_dict = evaluate_model(model, test_set)
    return model, metrics_dict

my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic)
metrics_dict

Computing the msd similarity matrix...
Done computing similarity matrix.


{'RMSE': 0.980150596704479, 'MAE': 0.980150596704479}

In [27]:
get_trained_model(KNNBasic, {'user_based': False, 'name': 'pearson'}, train)


Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1170dbd30>

# Benchmarking

In [28]:
from surprise.prediction_algorithms.knns import KNNBasic

benchmark_dict = {}

model_kwargs = {'user_based': True, 'name': 'cosine'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN user based cosine'] = metrics_dict

model_kwargs = {'user_based': True, 'name': 'pearson'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN user based pearson'] = metrics_dict

model_kwargs = {'user_based': False, 'name': 'cosine'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN item based cosine'] = metrics_dict

model_kwargs = {'user_based': False, 'name': 'pearson'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN item based pearson'] = metrics_dict


benchmark_dict

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


{'KNN user based cosine': {'RMSE': 1.0193536815834319,
  'MAE': 1.0193536815834319},
 'KNN user based pearson': {'RMSE': 1.0150350905205965,
  'MAE': 1.0150350905205965},
 'KNN item based cosine': {'RMSE': 1.0264295933767333,
  'MAE': 1.0264295933767333},
 'KNN item based pearson': {'RMSE': 1.041104054968961,
  'MAE': 1.041104054968961}}

In [29]:
benchmark_dict = {}

model_dict_list = [
    {
        'model_name' : 'KNN user based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN user based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'pearson'}
    },
]

for model_dict in model_dict_list:
    model, metrics_dict = train_and_evalute_model_pipeline(
        model_dict['model_class'], model_dict['model_kwargs'])
    benchmark_dict[model_dict['model_name']] = metrics_dict
    model_dict['fitted_model'] = model
    
benchmark_dict

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


{'KNN user based with cosine similarity': {'RMSE': 1.0193536815834319,
  'MAE': 1.0193536815834319},
 'KNN user based with pearson similarity': {'RMSE': 1.0150350905205965,
  'MAE': 1.0150350905205965}}

# Cross validation

In [30]:
from surprise.model_selection import cross_validate

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0167  1.0030  1.0096  1.0155  1.0185  1.0126  0.0057  
MAE (testset)     0.8036  0.7969  0.8024  0.8065  0.8088  0.8036  0.0040  
Fit time          1.94    1.78    1.87    1.83    1.78    1.84    0.06    
Test time         4.64    4.23    4.68    4.37    4.38    4.46    0.17    


{'test_rmse': array([1.01666025, 1.002967  , 1.00956277, 1.01546667, 1.01852717]),
 'test_mae': array([0.8035897 , 0.79691282, 0.80239156, 0.80647929, 0.80876319]),
 'fit_time': (1.9350700378417969,
  1.7831077575683594,
  1.8685150146484375,
  1.8335139751434326,
  1.7765541076660156),
 'test_time': (4.640166997909546,
  4.229506015777588,
  4.677316904067993,
  4.371628999710083,
  4.377537965774536)}

# NMF

In [47]:
from surprise import NMF
from surprise.model_selection import cross_validate
from surprise import NormalPredictor
algo = NormalPredictor()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3)
print(perf)
 

{'test_rmse': array([1.52341906, 1.52687755, 1.52257202]), 'test_mae': array([1.2254386 , 1.22571624, 1.22455314]), 'fit_time': (0.1397261619567871, 0.16882991790771484, 0.13615703582763672), 'test_time': (0.6412389278411865, 0.4476642608642578, 0.27816009521484375)}


In [48]:
from surprise import NMF
algo = NMF()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3)
print(perf)

{'test_rmse': array([0.9774521, 0.9769264, 0.972586 ]), 'test_mae': array([0.76688009, 0.76871545, 0.76305713]), 'fit_time': (6.107399940490723, 5.762816905975342, 6.271279811859131), 'test_time': (0.3113729953765869, 0.4848639965057373, 0.48643994331359863)}


# SVD

In [50]:
from surprise import SVD
algo = SVD()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3)
print(perf)

{'test_rmse': array([0.95240962, 0.94403124, 0.94761104]), 'test_mae': array([0.75129077, 0.74587081, 0.74796527]), 'fit_time': (5.432853937149048, 5.1817307472229, 5.17937707901001), 'test_time': (0.31631994247436523, 0.43987035751342773, 0.44788479804992676)}


# SVD++

In [52]:
from surprise import SVDpp
algo = SVDpp()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3)
print(perf)


{'test_rmse': array([0.9337957 , 0.92472426, 0.92509491]), 'test_mae': array([0.73243991, 0.72842871, 0.72799848]), 'fit_time': (170.6865348815918, 158.2787880897522, 173.03230786323547), 'test_time': (6.3825578689575195, 6.7504048347473145, 6.134533882141113)}


# User recommendation

In [None]:
import pandas

def get_user_recommendation

pass