In [27]:
import pandas as pd

from pathlib import Path
from surprise import SVD, NMF
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.algo_base import AlgoBase
from surprise.dump import dump, load


In [28]:
from surprise import Dataset

ratings = Dataset.load_builtin('ml-100k')
ratings

<surprise.dataset.DatasetAutoFolds at 0x11c4ebac0>

In [29]:
from surprise.dataset import DatasetAutoFolds

def load_ratings_from_surprise() -> DatasetAutoFolds:
    ratings = Dataset.load_builtin('ml-100k')
    return ratings

load_ratings_from_surprise()

<surprise.dataset.DatasetAutoFolds at 0x11c4eb820>

In [30]:
from surprise import Reader
from pathlib import Path
import pandas as pd


reader = Reader(line_format = 'user item rating timestamp', sep=',', skip_lines=1)
rating_data = Dataset.load_from_file('ratings.csv', reader)
rating_data

<surprise.dataset.DatasetAutoFolds at 0x11c4eb730>

In [31]:
def get_data(from_surprise : bool = True) -> DatasetAutoFolds:
    data = load_ratings_from_surprise() if from_surprise else load_ratings_from_file()
    return data

data = get_data(from_surprise=True)
data

<surprise.dataset.DatasetAutoFolds at 0x10f1dbf70>

In [32]:
from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)
train

<surprise.trainset.Trainset at 0x10e9c5400>

In [33]:
from surprise import SVD

model = SVD()

In [34]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11c4ebdf0>

In [35]:
from surprise.trainset import Trainset
from  surprise.prediction_algorithms.algo_base import AlgoBase

from surprise.prediction_algorithms.knns import KNNBasic


def get_trained_model(model_class: AlgoBase, model_kwargs: dict, train_set: Trainset) -> AlgoBase:
    model = model_class(sim_options = model_kwargs)
    model.fit(train_set)
    return model

model_kwargs = {'sim_options': {'user_based': False, 'name': 'pearson'}}
get_trained_model(KNNBasic, {'user_based': False, 'name': 'pearson'}, train)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1179795b0>

In [36]:
from surprise import accuracy

def evaluate_model(model: AlgoBase, test_set: [(int, int, float)]) -> dict:
    predictions = model.test(test_set)
    metrics_dict = {}
    metrics_dict['RMSE'] = accuracy.rmse(predictions, verbose=False)
    metrics_dict['MAE'] = accuracy.rmse(predictions, verbose=False)
    return metrics_dict


In [37]:
from surprise.model_selection import train_test_split


from surprise.prediction_algorithms.knns import KNNBasic

def train_and_evalute_model_pipeline(model_class: AlgoBase, model_kwargs: dict = {},
                                     from_surprise: bool = True,
                                     test_size: float = 0.2) -> (AlgoBase, dict):
    data = get_data(from_surprise)
    train_set, test_set = train_test_split(data, test_size, random_state=42)
    model = get_trained_model(model_class, model_kwargs, train_set)
    metrics_dict = evaluate_model(model, test_set)
    return model, metrics_dict

my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic)
metrics_dict

Computing the msd similarity matrix...
Done computing similarity matrix.


{'RMSE': 0.980150596704479, 'MAE': 0.980150596704479}

In [38]:
from surprise.prediction_algorithms.algo_base import AlgoBase
def get_user_recommendation(model: AlgoBase,user_id: int, k: int) -> pd.DataFrame:
    data_dir = Path(Path.cwd().parents[0], 'data', 'movielens', 'ml-latest-small')
    model_dir = Path(Path.cwd().parents[0], 'models')
    
    
    
    try:
        preds, model = load(Path(model_dir, str(model)[8:-2]))
    except:
        data = Dataset.load_builtin('ml-100k')
        trainset = data.build_full_trainset()
        
        testset = trainset.build_anti_testset()
        trained_model = get_trained_model(model,trainset)
        preds = trained_model.test(testset)
        dump(Path(model_dir, str(model)[8:-2]), preds, trained_model)
    top_n = predictions.get_top_n(preds, n=k)
    movies = Dataset.load_builtin_item_data('movies.csv', data_dir)

    
    return predictions.get_item_details(top_n[user_id], movies, 'movieId', ['movieId', 'title', 'genres'])

recommendations = get_user_recommendation(SVD, '196', 10)
recommendations.reset_index(drop=True, inplace=True)
display(recommendations)
    
    

TypeError: get_trained_model() missing 1 required positional argument: 'train_set'