In [1]:
import utils.data_loader as data_loader
from pathlib import Path
import utils.model_loader as model_loader

In [2]:
data_raw = data_loader.get_data('ml-100k')

In [3]:
data_dir = Path(Path.cwd().parents[0], 'data', 'movielens', 'ml-latest-small')
data_raw = data_loader.get_data('ratings.csv', False)

## Model Pipeline

In [4]:
from surprise.model_selection import train_test_split
from surprise import SVD, NMF
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.algo_base import AlgoBase

In [9]:
def train_and_evalute_model_pipeline(model_class: AlgoBase, dataset: str = 'ml-100k', 
                                     from_surprise: bool = True, 
                                     test_size: float = 0.2,
                                     model_kwargs: dict = {}) -> (AlgoBase, dict):
    data = data_loader.get_data(dataset, from_surprise)
    train_set, test_set = train_test_split(data, test_size, random_state=42)
    model = model_loader.get_trained_model(model_class, train_set, model_kwargs)
    metrics_dict = model_loader.evaluate_model(model, test_set)
    return model, metrics_dict

In [10]:
my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic)
metrics_dict

Computing the msd similarity matrix...
Done computing similarity matrix.


{'RMSE': 0.980150596704479, 'MAE': 0.980150596704479}

In [21]:
model_kwargs = {'sim_options': {'user_based': False, 'name': 'pearson'}}
my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs=model_kwargs)
metrics_dict

Computing the pearson similarity matrix...
Done computing similarity matrix.


{'RMSE': 1.041104054968961, 'MAE': 1.041104054968961}

In [18]:
my_model, metrics_dict = train_and_evalute_model_pipeline(SVD)
metrics_dict

{'RMSE': 0.9360802939362804, 'MAE': 0.9360802939362804}

## Benchmarking

In [24]:
benchmark_dict = {}


model_dict_list = [
    {
        'model_name' : 'KNN user based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN user based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'pearson'}
    },
    {
        'model_name' : 'KNN item based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': False, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN item based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': False, 'name': 'pearson'}
    },
    {
        'model_name' : 'SVD',
        'model_class' : SVD
    },
    {
        'model_name' : 'NMF',
        'model_class' : NMF
    },
    
]

for model_dict in model_dict_list:
    model, metrics_dict = train_and_evalute_model_pipeline(
        model_dict['model_class'], model_kwargs = model_dict.get('model_kwargs', {}))
    benchmark_dict[model_dict['model_name']] = metrics_dict
    model_dict['fitted_model'] = model
    

benchmark_dict

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


{'KNN user based with cosine similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN user based with pearson similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN item based with cosine similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'KNN item based with pearson similarity': {'RMSE': 0.980150596704479,
  'MAE': 0.980150596704479},
 'SVD': {'RMSE': 0.9345987396157499, 'MAE': 0.9345987396157499},
 'NMF': {'RMSE': 0.9624799306524018, 'MAE': 0.9624799306524018}}

## User Recommendation

In [97]:
import pandas as pd

def get_movies_data():
    movies = pd.read_csv(Path(data_dir, 'movies.csv'))
    return movies

def get_predictions(model, user, movies, k):
    movies['user'] = user
    preds = movies.apply(lambda x: model.predict(x[0], x[-1]), 1, result_type='expand')
    idx = preds[3].argsort()[:k]
    ids = preds.iloc[idx, 0]
    mvs = movies.movieId.isin(ids)
    return movies.loc[mvs, ['title', 'genres']]
    

    
def get_user_recommendation(model: AlgoBase, user_id: int, k: int, data, movies : pandas.DataFrame
                           ) -> pandas.DataFrame:
    """Makes movie recommendations a user.
    
    Parameters
    ----------
        model : AlgoBase
            A trained surprise model
        user_id : int
            The user for whom the recommendation will be done.
        k : int
            The number of items to recommend.
        data : FIXME
            The data needed to do the recommendation.
        movies : pandas.DataFrame
            The dataframe containing the movies metadata (title, genre, etc)
        
    Returns
    -------
    pandas.Dataframe
        A dataframe with the k movies that will be recommended the user. The dataframe should have the following
        columns (movie_name : str, movie_genre : str, predicted_rating : float, true_rating : float)
        
    Notes
    -----
    - You should create other functions that are used in this one and not put all the code in the same function.
        For example to create the final dataframe, instead of implemented all the code
        in this function (get_user_recommendation), you can create a new one (create_recommendation_dataframe)
        that will be called in this function.
    - You can add other arguments to the function if you need to.
    """
    
    model = model_loader.get_trained_model(model, data.build_full_trainset())
    predictions = get_predictions(model, user_id, movies, k)
    return predictions

recommendations = get_user_recommendation(SVD, 196, 10, data_raw, get_movies_data())
recommendations[['title', 'genres']]

Unnamed: 0,title,genres
0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
6490,Mr. Brooks (2007),Crime|Drama|Thriller
6491,"Librarian: Return to King Solomon's Mines, The...",Action|Adventure|Fantasy
6492,"Librarian: Quest for the Spear, The (2004)",Action|Adventure|Comedy|Fantasy|Romance
6493,Fay Grim (2006),Action|Thriller
6494,"I'm a Cyborg, But That's OK (Saibogujiman kwen...",Comedy|Drama|Romance|Sci-Fi
6495,"Breed, The (2006)",Horror|Thriller
6496,Cashback (2006),Comedy|Drama|Romance
6497,Ocean's Thirteen (2007),Crime|Thriller
6498,Them (Ils) (2006),Horror
