In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

Cloning into 'Recommender-Systems-Challenge-2022'...
remote: Enumerating objects: 547, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 547 (delta 76), reused 92 (delta 42), pack-reused 404[K
Receiving objects: 100% (547/547), 90.69 MiB | 12.05 MiB/s, done.
Resolving deltas: 100% (248/248), done.
Updating files: 100% (228/228), done.


In [2]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1582 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 903 ( 2.2%) Users that have less than 1 test interactions


In [3]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(URM_all)
#recommender_SLIMElasticNet.fit(topK=405, l1_ratio=0.0010299956370568744, alpha=0.01) old values
recommender_SLIMElasticNet.fit(topK=306, l1_ratio=0.0035658486108087256, alpha=0.004181961739488717)

SLIMElasticNetRecommender: Processed 2122 ( 8.7%) in 5.00 min. Items per second: 7.07
SLIMElasticNetRecommender: Processed 4349 (17.7%) in 10.00 min. Items per second: 7.24
SLIMElasticNetRecommender: Processed 6654 (27.2%) in 15.01 min. Items per second: 7.39
SLIMElasticNetRecommender: Processed 8978 (36.6%) in 20.01 min. Items per second: 7.48
SLIMElasticNetRecommender: Processed 11351 (46.3%) in 25.01 min. Items per second: 7.56
SLIMElasticNetRecommender: Processed 13721 (56.0%) in 30.01 min. Items per second: 7.62
SLIMElasticNetRecommender: Processed 16089 (65.7%) in 35.01 min. Items per second: 7.66
SLIMElasticNetRecommender: Processed 18460 (75.3%) in 40.01 min. Items per second: 7.69
SLIMElasticNetRecommender: Processed 23278 (95.0%) in 50.01 min. Items per second: 7.76
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 52.48 min. Items per second: 7.78


In [4]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_RP3beta = RP3betaRecommender(URM_all)
recommender_RP3beta.fit(alpha=0.3196900461986604, beta=0.3530045735373658, topK=44, implicit=False)
#recommender_RP3beta.fit(alpha=0.08934428697754494, beta=0.15025275671838492, topK=132, implicit=True)
#recommender_RP3beta.fit(topK=167, alpha=1.0, beta=0.4520495673133021, implicit=True) old

RP3betaRecommender: Similarity column 24507 (100.0%), 1044.98 column/sec. Elapsed time 23.45 sec


In [5]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_EaseR = EASE_R_Recommender(URM_all)
recommender_EaseR.fit(l2_norm=249.36129021601147)

EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 11.07 min


In [6]:
print("MAP of the starting models")

result_df, _ = evaluator_test.evaluateRecommender(recommender_SLIMElasticNet)
print("SLIM ElasticNet - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_test.evaluateRecommender(recommender_RP3beta)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_test.evaluateRecommender(recommender_EaseR)
print("EASE_R - MAP: {}".format(result_df.loc[10]["MAP"]))

MAP of the starting models
EvaluatorHoldout: Processed 40726 (100.0%) in 49.52 sec. Users per second: 822
SLIM ElasticNet - MAP: 0.0
EvaluatorHoldout: Processed 40726 (100.0%) in 34.28 sec. Users per second: 1188
RP3beta - MAP: 0.0
EvaluatorHoldout: Processed 40726 (100.0%) in 1.06 min. Users per second: 638
EASE_R - MAP: 0.0


In [7]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of three predictions scores
    R = R1*alpha + R2*beta + R3*(1-alpha-beta)
    
    Class from Dacrema exercise modified by Antonio Ercolani
    The original took as input 2 recommender

    """

    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2, recommender_3):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
        
        
        
    def fit(self, norm, alpha = 0.5, beta = 0.5):

        self.alpha = alpha
        self.beta = beta
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)
        item_weights_3 = self.recommender_3._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)
        norm_item_weights_3 = LA.norm(item_weights_3, self.norm)
        
        
        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))
        
        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))
            
        if norm_item_weights_3 == 0:
            raise ValueError("Norm {} of item weights for recommender 3 is zero. Avoiding division by zero".format(self.norm))
        
        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * self.beta + item_weights_3 / norm_item_weights_3 * (1-self.alpha-self.beta)

        return item_weights

In [10]:
recommender_object = DifferentLossScoresHybridRecommender(URM_train, recommender_SLIMElasticNet, recommender_RP3beta, recommender_EaseR)
best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "beta" : 0,
    "norm" : 0
}

for norm in [1,2]:
    for alpha in np.arange(0.0, 1.1, 0.1):
        for beta in np.arange(0.0, 1.1, 0.1):

            #truncate digits since np.arange sometimes doesn't
            alpha = round(alpha,1)
            beta = round(beta,1)


            #discard cases in which the sum is greater than 1
            if ( (alpha+beta) <= 1):
                theta = round(1-alpha-beta,1)

                print("----")
                recommender_object.fit(norm, alpha, beta)
                result_df, _ = evaluator_validation.evaluateRecommender(recommender_object)
                print("Norm: {}, Alpha: {}, Beta: {}, Theta: {}, Result: {}".format(norm, alpha, beta, 1-alpha-beta, result_df.loc[10]["MAP"]))

                if result_df.loc[10]["MAP"] > best_model["MAP"]:
                    best_model["MAP"] = result_df.loc[10]["MAP"]
                    best_model["alpha"] = alpha
                    best_model["beta"] = beta
                    best_model["norm"] = norm

                    print("*** New best model found! ")
                    print("New best model has MAP: {} with alpha: {}, beta: {}, theta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"],
                                                                                                            1-best_model["alpha"]-best_model["beta"], best_model["norm"]))

print("----")
print("Best model has MAP: {} with alpha: {}, beta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"], best_model["norm"]))

----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.84 min. Users per second: 363
Norm: 1, Alpha: 0.0, Beta: 0.0, Theta: 1.0, Result: 0.056185577184901865
*** New best model found! 
New best model has MAP: 0.056185577184901865 with alpha: 0.0, beta: 0.0, theta: 1.0, norm: 1
----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.83 min. Users per second: 364
Norm: 1, Alpha: 0.0, Beta: 0.1, Theta: 0.9, Result: 0.0538370698159811
----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.84 min. Users per second: 363
Norm: 1, Alpha: 0.0, Beta: 0.2, Theta: 0.8, Result: 0.050707665516825436
----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.82 min. Users per second: 366
Norm: 1, Alpha: 0.0, Beta: 0.3, Theta: 0.7, Result: 0.04793076666662491
----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.78 min. Users per second: 375
Norm: 1, Alpha: 0.0, Beta: 0.4, Theta: 0.6, Result: 0.04530175357446784
----
EvaluatorHoldout: Processed 40047 (100.0%) in 1.77 min. Users per second: 376
Norm: 1, Alpha: 0.0, 

KeyboardInterrupt: 

In [11]:
recommender = DifferentLossScoresHybridRecommender(URM_all, recommender_SLIMElasticNet, recommender_RP3beta, recommender_EaseR)
recommender.fit(norm=1, alpha = 0, beta = 0)

## Submissions

In [12]:
test_users = pd.read_csv('/kaggle/working/Recommender-Systems-Challenge-2022/Dataset/data_target_users_test.csv')

In [13]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [14]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('/kaggle/working/Submission_SLIM_EN_rp3Beta_EaseR.csv', index=False)