In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("RecsysToken")

!git clone https://{secret_value_0}@github.com/lucapetrh-dev/Recommender-Systems-Challenge-2022.git
    
import sys
sys.path.append("/kaggle/working/Recommender-Systems-Challenge-2022")

import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [None]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

In [None]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(URM_all)
recommender_SLIMElasticNet.fit(topK=799, l1_ratio=0.006782112530625445, alpha=0.0023657508503917664) 

In [None]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_EaseR = EASE_R_Recommender(URM_all)
recommender_EaseR.fit(topK=None, normalize_matrix=False, l2_norm=157.86876317814773)

In [None]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """
    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2



    def fit(self, norm, alpha = 0.5):

        self.alpha = alpha
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):

        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)


        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))

        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))

        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * (1-self.alpha)

        return item_weights

In [None]:
recommender_object = DifferentLossScoresHybridRecommender(URM_train, recommender_SLIMElasticNet, recommender_EaseR)

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "norm" : 0
}

for norm in [1,2]:
    for alpha in np.arange(0.0, 1.1, 0.1):

            #truncate digits since np.arange sometimes doesn't
            alpha = round(alpha,1)
            recommender_object.fit(norm, alpha)
            result_df, _ = evaluator_validation.evaluateRecommender(recommender_object)
            print("Norm: {}, Alpha: {}, Beta: {}, Result: {}".format(norm, alpha, 1-alpha, result_df.loc[10]["MAP"]))

            if result_df.loc[10]["MAP"] > best_model["MAP"]:
                best_model["MAP"] = result_df.loc[10]["MAP"]
                best_model["alpha"] = alpha
                best_model["norm"] = norm

                print("*** New best model found! ")
                print("New best model has MAP: {} with alpha: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["norm"]))

print("----")
print("Best model has MAP: {} with alpha: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["norm"]))

In [None]:
recommender = DifferentLossScoresHybridRecommender(URM_train, recommender_SLIMElasticNet, recommender_EaseR)
recommender.fit(norm=1, alpha = 0, beta = 1)

## Submissions

In [None]:
test_users = pd.read_csv('/kaggle/working/Recommender-Systems-Challenge-2022/Dataset/data_target_users_test.csv')

In [None]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [None]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('/kaggle/working/Submission_SLIM_EN_rp3Beta_ListMerge.csv', index=False)