In [1]:
import utils.data_loader as data_loader
from pathlib import Path
import utils.model_loader as model_loader

In [3]:
data_raw = data_loader.get_data('ml-100k')

In [2]:
# data_dir = Path(Path.cwd().parents[0], 'data', 'movielens', 'ml-latest-small')
data_raw = data_loader.get_data('ratings.csv', False)

## Model Pipeline

In [2]:
from surprise.model_selection import train_test_split
from surprise import SVD, NMF
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.algo_base import AlgoBase

In [6]:
def train_and_evalute_model_pipeline(model_class: AlgoBase, dataset: str = 'ml-100k', 
                                     from_surprise: bool = True, 
                                     test_size: float = 0.2,
                                     model_kwargs: dict = {}) -> (AlgoBase, dict):
    data = data_loader.get_data(dataset, from_surprise)
    train_set, test_set = train_test_split(data, test_size, random_state=42)
    model = model_loader.get_trained_model(model_class, train_set, model_kwargs)
    metrics_dict = model_loader.evaluate_model(model, test_set)
    return model, metrics_dict

In [7]:
my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic)
metrics_dict

Computing the msd similarity matrix...
Done computing similarity matrix.


{'RMSE': 0.980150596704479, 'MAE': 0.980150596704479}

In [21]:
model_kwargs = {'sim_options': {'user_based': False, 'name': 'pearson'}}
my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs=model_kwargs)
metrics_dict

Computing the pearson similarity matrix...
Done computing similarity matrix.


{'RMSE': 1.041104054968961, 'MAE': 1.041104054968961}

In [18]:
my_model, metrics_dict = train_and_evalute_model_pipeline(SVD)
metrics_dict

{'RMSE': 0.9360802939362804, 'MAE': 0.9360802939362804}

## Benchmarking

In [24]:
benchmark_dict = {}


model_dict_list = [
    {
        'model_name' : 'KNN user based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN user based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'pearson'}
    },
    {
        'model_name' : 'KNN item based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': False, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN item based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': False, 'name': 'pearson'}
    },
    {
        'model_name' : 'SVD',
        'model_class' : SVD
    },
    {
        'model_name' : 'NMF',
        'model_class' : NMF
    },
    
]

for model_dict in model_dict_list:
    model, metrics_dict = train_and_evalute_model_pipeline(
        model_dict['model_class'], model_kwargs = model_dict.get('model_kwargs', {}))
    benchmark_dict[model_dict['model_name']] = metrics_dict
    model_dict['fitted_model'] = model
    

benchmark_dict

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


{'KNN user based with cosine similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN user based with pearson similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN item based with cosine similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN item based with pearson similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'SVD': {'RMSE': 0.9345987396157499, 'MAE': 0.9345987396157499},
 'NMF': {'RMSE': 0.9624799306524018, 'MAE': 0.9624799306524018}}