### Define function to calculate precision@k and recall @k

In [None]:
from collections import defaultdict

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

### Import data

In [None]:
import send_status_mail as ssm
import joblib
from surprise import Dataset, Reader

df = joblib.load('../data/processed/preprocessed_data_movielens.pkl')
df.drop(columns=['title','genres','relevance','tag'], inplace=True)
# sort columns in required order
df = df[['userId', 'movieId', 'rating']]
# reset index, which was nonsense after import
df = df.reset_index().drop(columns=['index'])

# Load the data into Surprise format, columns have been sorted in required order (raw user id, raw item id, rating) beforehand
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(df, reader)

### Define evaluation parameters

In [None]:
from surprise.model_selection import KFold

# instantiate kf with random_state for reproducibility
kf = KFold(n_splits=5, random_state=42)

# define measure for which GridSearch results will be retrieved
measure = 'mae'

# define evaluation parameters
n_rec = [3,5,10,20] # number of recommendations => top k
threshold = 3.5 # threshold for relevant recommendations (real rating >= threshold => relevant)

### Retrieve precision@k and recall@k for all models using k-fold prediction

#### knn-models

In [None]:
# from surprise import KNNBasic

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_knnBasic.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = KNNBasic(sim_options=gs_result.best_params[measure]['sim_options'], k= gs_result.best_params[measure]['k'], min_k=gs_result.best_params[measure]['min_k'])


# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_knnBasic_dict, recalls_knnBasic_dict = {}, {}
# for k in n_rec:
#     precisions_knnBasic_dict[k] = []
#     recalls_knnBasic_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_knnBasic_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_knnBasic_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("knnBasic precision/recall@k")

In [None]:
# from surprise import KNNWithMeans

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_knnMeans.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = KNNWithMeans(sim_options=gs_result.best_params[measure]['sim_options'], k= gs_result.best_params[measure]['k'], min_k=gs_result.best_params[measure]['min_k'])


# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_knnMean_dict, recalls_knnMean_dict = {}, {}
# for k in n_rec:
#     precisions_knnMean_dict[k] = []
#     recalls_knnMean_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_knnMean_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_knnMean_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("knnMeans precision/recall@k")

In [None]:
# from surprise import KNNBaseline

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_knnBaseline.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = KNNBaseline(sim_options=gs_result.best_params[measure]['sim_options'], k= gs_result.best_params[measure]['k'], min_k=gs_result.best_params[measure]['min_k'])


# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_knnBaseline_dict, recalls_knnBaseline_dict = {}, {}
# for k in n_rec:
#     precisions_knnBaseline_dict[k] = []
#     recalls_knnBaseline_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_knnBaseline_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_knnBaseline_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("knnBaseline precision/recall@k")

In [None]:
# from surprise import KNNWithZScore

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_knnZScore.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = KNNWithZScore(sim_options=gs_result.best_params[measure]['sim_options'], k= gs_result.best_params[measure]['k'], min_k=gs_result.best_params[measure]['min_k'])


# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_knnZScore_dict, recalls_knnZScore_dict = {}, {}
# for k in n_rec:
#     precisions_knnZScore_dict[k] = []
#     recalls_knnZScore_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_knnZScore_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_knnZScore_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("knnZScore precision/recall@k")

#### matrix factorization models

In [None]:
# from surprise import SVD

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_SVD.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = SVD(n_factors=gs_result.best_params[measure]['n_factors'],
#            n_epochs=gs_result.best_params[measure]['n_epochs'],
#            biased=gs_result.best_params[measure]['biased'],
#            lr_all=gs_result.best_params[measure]['lr_all'],
#            reg_all=gs_result.best_params[measure]['reg_all'],
#            random_state=42)

# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_SVD_dict, recalls_SVD_dict = {}, {}
# for k in n_rec:
#     precisions_SVD_dict[k] = []
#     recalls_SVD_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_SVD_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_SVD_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("SVD precision/recall@k")

In [None]:
# from surprise import NMF

# # import results from parameter tuning
# gs_result = joblib.load('../models/surp_gridsearchcv_NMF.pkl')

# # instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
# algo = NMF(n_factors=gs_result.best_params[measure]['n_factors'],
#            n_epochs=gs_result.best_params[measure]['n_epochs'],
#            biased=gs_result.best_params[measure]['biased'],
#            reg_pu=gs_result.best_params[measure]['reg_pu'],
#            reg_qi=gs_result.best_params[measure]['reg_qi'],
#            random_state=42)

# # initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# # empty lists will be filled in the next loop, iteration over splits
# precisions_NMF_dict, recalls_NMF_dict = {}, {}
# for k in n_rec:
#     precisions_NMF_dict[k] = []
#     recalls_NMF_dict[k] = []

# # iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
# for trainset, testset in kf.split(data):
#     algo.fit(trainset)
#     predictions = algo.test(testset)
#     # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
#     for k in n_rec:
#         precisions, recalls = precision_recall_at_k(predictions, k, threshold)

#         # Precision and recall can then be averaged over all users
#         precisions_NMF_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
#         recalls_NMF_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# # send completion message via email (server, sender, recepient according to .env)
# ssm.sendstatus("NMF precision/recall@k")

#### other models

In [None]:
"""Some algorithms randomly initialize their parameters (sometimes with numpy), and the cross-validation folds are also randomly generated. 
If you need to reproduce your experiments multiple times, you just have to set the seed of the RNG at the beginning of your program:"""

import random
import numpy as np

my_seed = 42
random.seed(my_seed)
np.random.seed(my_seed)

In [None]:
from surprise import BaselineOnly

# import results from parameter tuning
gs_result = joblib.load('../models/surp_gridsearchcv_BaselineOnly.pkl')

# instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
algo = BaselineOnly(bsl_options=gs_result.best_params[measure]['bsl_options'])

# initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# empty lists will be filled in the next loop, iteration over splits
precisions_Baseline_dict, recalls_Baseline_dict = {}, {}
for k in n_rec:
    precisions_Baseline_dict[k] = []
    recalls_Baseline_dict[k] = []

# iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
    for k in n_rec:
        precisions, recalls = precision_recall_at_k(predictions, k, threshold)

        # Precision and recall can then be averaged over all users
        precisions_Baseline_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
        recalls_Baseline_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# send completion message via email (server, sender, recepient according to .env)
ssm.sendstatus("BaselineOnly precision/recall@k")

In [None]:
from surprise import CoClustering

# import results from parameter tuning
gs_result = joblib.load('../models/surp_gridsearchcv_CoClustering.pkl')

# instantiate model with winner parameters from GridSearch according to desired measure, e.g. MAE
algo = CoClustering(n_cltr_u=gs_result.best_params[measure]['n_cltr_u'],
                    n_cltr_i=gs_result.best_params[measure]['n_cltr_i'],
                    n_epochs=gs_result.best_params[measure]['n_epochs'],
                    random_state=42)

# initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# empty lists will be filled in the next loop, iteration over splits
precisions_CC_dict, recalls_CC_dict = {}, {}
for k in n_rec:
    precisions_CC_dict[k] = []
    recalls_CC_dict[k] = []

# iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
    for k in n_rec:
        precisions, recalls = precision_recall_at_k(predictions, k, threshold)

        # Precision and recall can then be averaged over all users
        precisions_CC_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
        recalls_CC_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# send completion message via email (server, sender, recepient according to .env)
ssm.sendstatus("CC precision/recall@k")

In [None]:
from surprise import NormalPredictor

# instantiate model
# no GridSearch was performed, since algorithm does not take any arguments
algo = NormalPredictor()

# initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# empty lists will be filled in the next loop, iteration over splits
precisions_rand_dict, recalls_rand_dict = {}, {}
for k in n_rec:
    precisions_rand_dict[k] = []
    recalls_rand_dict[k] = []

# iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
    for k in n_rec:
        precisions, recalls = precision_recall_at_k(predictions, k, threshold)

        # Precision and recall can then be averaged over all users
        precisions_rand_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
        recalls_rand_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# send completion message via email (server, sender, recepient according to .env)
ssm.sendstatus("NormalPredictor precision/recall@k")

In [None]:
from surprise import SlopeOne

# instantiate model
# no GridSearch was performed, since algorithm does not take any arguments
algo = SlopeOne()

# initiate dicts holding lists of average precision/recall for different k (n_rec) respectively
# empty lists will be filled in the next loop, iteration over splits
precisions_SlopeOne_dict, recalls_SlopeOne_dict = {}, {}
for k in n_rec:
    precisions_SlopeOne_dict[k] = []
    recalls_SlopeOne_dict[k] = []

# iterate over all splits, for each split: train model, predict, retrieve precision/recall for top k recommendations
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    # iterate over all n_rec (number of recommendations): retrieve precision/recall for top k recommendations
    for k in n_rec:
        precisions, recalls = precision_recall_at_k(predictions, k, threshold)

        # Precision and recall can then be averaged over all users
        precisions_SlopeOne_dict[k].append(sum(prec for prec in precisions.values()) / len(precisions))
        recalls_SlopeOne_dict[k].append(sum(rec for rec in recalls.values()) / len(recalls))

# send completion message via email (server, sender, recepient according to .env)
ssm.sendstatus("SlopeOne precision/recall@k")

#### Testing reproducibility of splits

In [None]:
"""
kf = KFold(n_splits=5, random_state=42)
for trainset, testset in kf.split(data):
    print(trainset.global_mean)

# result #1
# 3.473827167636637
# 3.4742678511810365
# 3.4738866842197114
# 3.4744854068183715
# 3.4738730499427124

for trainset, testset in kf.split(data):
    print(trainset.global_mean)

# result #2
# 3.473827167636637
# 3.4742678511810365
# 3.4738866842197114
# 3.4744854068183715
# 3.4738730499427124

# => reproducibility is given
"""

### Collect all results in one dict and save it

In [None]:
# collect all dicts with precisions and recalls in one dict
surp_precision_at_k_recall_at_k = {}
surp_precision_at_k_recall_at_k['precisions_knnBasic_dict'] = precisions_knnBasic_dict
surp_precision_at_k_recall_at_k['recalls_knnBasic_dict'] = recalls_knnBasic_dict
surp_precision_at_k_recall_at_k['precisions_knnMean_dict'] = precisions_knnMean_dict
surp_precision_at_k_recall_at_k['recalls_knnMean_dict'] = recalls_knnMean_dict
surp_precision_at_k_recall_at_k['precisions_knnBaseline_dict'] = precisions_knnBaseline_dict
surp_precision_at_k_recall_at_k['recalls_knnBaseline_dict'] = recalls_knnBaseline_dict
surp_precision_at_k_recall_at_k['precisions_knnZScore_dict'] = precisions_knnZScore_dict
surp_precision_at_k_recall_at_k['recalls_knnZScore_dict'] = recalls_knnZScore_dict
surp_precision_at_k_recall_at_k['precisions_SVD_dict'] = precisions_SVD_dict
surp_precision_at_k_recall_at_k['recalls_SVD_dict'] = recalls_SVD_dict
surp_precision_at_k_recall_at_k['precisions_NMF_dict'] = precisions_NMF_dict
surp_precision_at_k_recall_at_k['recalls_NMF_dict'] = recalls_NMF_dict
surp_precision_at_k_recall_at_k['precisions_Baseline_dict'] = precisions_Baseline_dict
surp_precision_at_k_recall_at_k['recalls_Baseline_dict'] = recalls_Baseline_dict
surp_precision_at_k_recall_at_k['precisions_CC_dict'] = precisions_CC_dict
surp_precision_at_k_recall_at_k['recalls_CC_dict'] = recalls_CC_dict
surp_precision_at_k_recall_at_k['precisions_rand_dict'] = precisions_rand_dict
surp_precision_at_k_recall_at_k['recalls_rand_dict'] = recalls_rand_dict
surp_precision_at_k_recall_at_k['precisions_SlopeOne_dict'] = precisions_SlopeOne_dict
surp_precision_at_k_recall_at_k['recalls_SlopeOne_dict'] = recalls_SlopeOne_dict

# save dict to pkl
joblib.dump(surp_precision_at_k_recall_at_k, '../models/surp_precision_at_k_recall_at_k.json')


### Visualization attempt (deactivatet)

In [None]:
# import seaborn as sns
# import numpy as np
# sns.boxplot([precisions_knnZScore_dict[3],precisions_knnZScore_dict[5],
#              precisions_knnZScore_dict[10],precisions_knnZScore_dict[20]]);
# sns.relplot(data=precisions_knnZScore_dict,x=[1,2,3,4,5],y=precisions_knnZScore_dict[3], label='k=3')
# sns.relplot(data=precisions_knnZScore_dict,x=[1,2,3,4,5],y=precisions_knnZScore_dict[5])
# #sns.relplot(data=precisions_knnZScore_dict,x=[3,5,10,20],y=(np.mean(precisions_knnZScore_dict[3])))