In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

Cloning into 'Recommender-Systems-Challenge-2022'...
remote: Enumerating objects: 619, done.[K
remote: Counting objects: 100% (215/215), done.[K
remote: Compressing objects: 100% (162/162), done.[K
remote: Total 619 (delta 107), reused 139 (delta 52), pack-reused 404[K
Receiving objects: 100% (619/619), 106.18 MiB | 21.90 MiB/s, done.
Resolving deltas: 100% (279/279), done.
Updating files: 100% (254/254), done.


In [2]:
ICM_type_df = pd.read_csv("Dataset/Edited/data_ICM_type.csv")
ICM_type_df

items = ICM_type_df.item_id
features = ICM_type_df.feature_id
data = ICM_type_df.data
ICM_type = sps.csr_matrix((data, (items, features)))
ICM_type = ICM_type.astype(dtype = np.int32)
ICM_type.shape

n_users = 41629
n_itemsFromICM = ICM_type.shape[0]

URM_all_dataframe = pd.read_csv("Dataset/Edited/URM_Binary_Ratings.csv")
URM_all_dataframe.columns = ["UserID", "ItemID", "Data"]
URM_all = sps.coo_matrix((URM_all_dataframe["Data"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)), shape=(n_users,n_itemsFromICM))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)

In [3]:
UCM_all = sps.vstack([URM_all, ICM_type.T])
UCM_all = sps.csr_matrix(UCM_all)

In [4]:
URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

UCM_train, UCM_test = split_train_in_two_percentage_global_sample(UCM_all, train_percentage = 0.85)
UCM_train, UCM_validation = split_train_in_two_percentage_global_sample(UCM_train, train_percentage = 0.85)

evaluator_URM_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_URM_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
evaluator_UCM_validation = EvaluatorHoldout(UCM_validation, cutoff_list=[10])
evaluator_UCM_test = EvaluatorHoldout(UCM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1541 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 958 ( 2.3%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 1489 ( 3.6%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 929 ( 2.2%) Users that have less than 1 test interactions


In [5]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_RP3beta = RP3betaRecommender(UCM_train)
recommender_RP3beta.fit(alpha=0.6949339074999242, beta=0.2853519331891143, topK=59, implicit=True)

RP3betaRecommender: URM Detected 3 ( 0.0%) users with no interactions.
RP3betaRecommender: URM Detected 914 ( 3.3%) items with no interactions.
RP3betaRecommender: Similarity column 27968 (100.0%), 884.57 column/sec. Elapsed time 31.62 sec


In [6]:
result_df, _ = evaluator_UCM_validation.evaluateRecommender(recommender_RP3beta)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

EvaluatorHoldout: Processed 40148 (100.0%) in 32.48 sec. Users per second: 1236
RP3beta - MAP: 0.01871290115461654


In [7]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(UCM_train)
recommender_SLIMElasticNet.fit(topK=585, l1_ratio=0.009929794168572358, alpha=0.0018687691383859428)

SLIMElasticNetRecommender: URM Detected 3 ( 0.0%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 914 ( 3.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 3397 (12.1%) in 5.00 min. Items per second: 11.32
SLIMElasticNetRecommender: Processed 7049 (25.2%) in 10.00 min. Items per second: 11.75
SLIMElasticNetRecommender: Processed 10793 (38.6%) in 15.00 min. Items per second: 11.99
SLIMElasticNetRecommender: Processed 14527 (51.9%) in 20.00 min. Items per second: 12.10
SLIMElasticNetRecommender: Processed 18243 (65.2%) in 25.00 min. Items per second: 12.16
SLIMElasticNetRecommender: Processed 21986 (78.6%) in 30.00 min. Items per second: 12.21
SLIMElasticNetRecommender: Processed 24893 (89.0%) in 35.00 min. Items per second: 11.85
SLIMElasticNetRecommender: Processed 26094 (93.3%) in 40.01 min. Items per second: 10.87
SLIMElasticNetRecommender: Processed 27275 (97.5%) in 45.01 min. Items per second: 10.10
SLIMElasticNetRecommender: Processed 27968 (

In [8]:
result_df, _ = evaluator_UCM_validation.evaluateRecommender(recommender_SLIMElasticNet)
print("SLIM ElasticNet - MAP: {}".format(result_df.loc[10]["MAP"]))

EvaluatorHoldout: Processed 40148 (100.0%) in 54.16 sec. Users per second: 741
SLIM ElasticNet - MAP: 0.019686434003709733


In [9]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_EaseR = EASE_R_Recommender(UCM_train)
recommender_EaseR.fit(topK=None, normalize_matrix=False, l2_norm=225.7403645195658)

EASE_R_Recommender: URM Detected 3 ( 0.0%) users with no interactions.
EASE_R_Recommender: URM Detected 914 ( 3.3%) items with no interactions.
EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 15.77 min


In [10]:
result_df, _ = evaluator_UCM_validation.evaluateRecommender(recommender_EaseR)
print("EaseR - MAP: {}".format(result_df.loc[10]["MAP"]))

EvaluatorHoldout: Processed 40148 (100.0%) in 53.99 sec. Users per second: 744
EaseR - MAP: 0.0189714593701713


In [11]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of three predictions scores
    R = R1*alpha + R2*beta + R3*(1-alpha-beta)
    
    Class from Dacrema exercise modified by Antonio Ercolani
    The original took as input 2 recommender

    """

    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2, recommender_3):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
        
        
        
    def fit(self, norm, alpha = 0.5, beta = 0.5):

        self.alpha = alpha
        self.beta = beta
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)
        item_weights_3 = self.recommender_3._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)
        norm_item_weights_3 = LA.norm(item_weights_3, self.norm)
        
        
        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))
        
        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))
            
        if norm_item_weights_3 == 0:
            raise ValueError("Norm {} of item weights for recommender 3 is zero. Avoiding division by zero".format(self.norm))
        
        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * self.beta + item_weights_3 / norm_item_weights_3 * (1-self.alpha-self.beta)

        return item_weights

In [12]:
recommender_object = DifferentLossScoresHybridRecommender(UCM_train, recommender_SLIMElasticNet, recommender_EaseR, recommender_RP3beta)

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "beta" : 0,
    "norm" : 0
}

norm = 1
for alpha in np.arange(0.0, 1.1, 0.1):
    for beta in np.arange(0.0, 1.1, 0.1):
            
        #truncate digits since np.arange sometimes doesn't
        alpha = round(alpha,1)
        beta = round(beta,1)
            
            
        #discard cases in which the sum is greater than 1 
        if ( (alpha+beta) <= 1): 
            theta = round(1-alpha-beta,1)
            
            print("----")
            recommender_object.fit(norm, alpha, beta)
            result_df, _ = evaluator_UCM_validation.evaluateRecommender(recommender_object)
            print("Norm: {}, Alpha: {}, Beta: {}, Theta: {}, Result: {}".format(norm, alpha, beta, 1-alpha-beta, result_df.loc[10]["MAP"]))

            if result_df.loc[10]["MAP"] > best_model["MAP"]:
                best_model["MAP"] = result_df.loc[10]["MAP"]
                best_model["alpha"] = alpha
                best_model["beta"] = beta
                best_model["norm"] = norm

                print("*** New best model found! ")
                print("New best model has MAP: {} with alpha: {}, beta: {}, theta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"],
                                                                                                        1-best_model["alpha"]-best_model["beta"], best_model["norm"]))
print("----")
print("Best model has MAP: {} with alpha: {}, beta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"], best_model["norm"]))

DifferentLossScoresHybridRecommender: URM Detected 3 ( 0.0%) users with no interactions.
DifferentLossScoresHybridRecommender: URM Detected 914 ( 3.3%) items with no interactions.
----
EvaluatorHoldout: Processed 40148 (100.0%) in 1.48 min. Users per second: 451
Norm: 1, Alpha: 0.0, Beta: 0.0, Theta: 1.0, Result: 0.01871290115461654
*** New best model found! 
New best model has MAP: 0.01871290115461654 with alpha: 0.0, beta: 0.0, theta: 1.0, norm: 1
----
EvaluatorHoldout: Processed 40148 (100.0%) in 1.62 min. Users per second: 414
Norm: 1, Alpha: 0.0, Beta: 0.1, Theta: 0.9, Result: 0.018932654535361762
*** New best model found! 
New best model has MAP: 0.018932654535361762 with alpha: 0.0, beta: 0.1, theta: 0.9, norm: 1
----
EvaluatorHoldout: Processed 40148 (100.0%) in 1.62 min. Users per second: 413
Norm: 1, Alpha: 0.0, Beta: 0.2, Theta: 0.8, Result: 0.019118747736549077
*** New best model found! 
New best model has MAP: 0.019118747736549077 with alpha: 0.0, beta: 0.2, theta: 0.8, no

## Submissions

In [13]:
#test_users = pd.read_csv('/kaggle/working/Recommender-Systems-Challenge-2022/Dataset/data_target_users_test.csv')

In [14]:
#user_id = test_users['user_id']
#recommendations = []
#for user in user_id:
    #recommendations.append(recommender.recommend(user, cutoff=10))

In [15]:
#for index in range(len(recommendations)):
    #recommendations[index]=np.array(recommendations[index])

#test_users['item_list']= recommendations
#test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
#test_users.to_csv('/kaggle/working/Submission_SLIM_EN_rp3Beta_EaseR_Linear.csv', index=False)