In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [2]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1566 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 949 ( 2.3%) Users that have less than 1 test interactions


In [4]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(URM_all)
#recommender_SLIMElasticNet.fit(topK=405, l1_ratio=0.0010299956370568744, alpha=0.01) old values
recommender_SLIMElasticNet.fit(topK=799, l1_ratio=0.006782112530625445, alpha=0.0023657508503917664)

SLIMElasticNetRecommender: Processed 5574 (22.7%) in 5.00 min. Items per second: 18.57
SLIMElasticNetRecommender: Processed 11368 (46.4%) in 10.00 min. Items per second: 18.94
SLIMElasticNetRecommender: Processed 17061 (69.6%) in 15.00 min. Items per second: 18.95
SLIMElasticNetRecommender: Processed 20717 (84.5%) in 20.00 min. Items per second: 17.26
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 23.26 min. Items per second: 17.56


In [5]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_RP3beta = RP3betaRecommender(URM_all)
recommender_RP3beta.fit(alpha=0.9170274010180015, beta=0.09897166685470696, topK=94, implicit=True)
#recommender_RP3beta.fit(alpha=0.08934428697754494, beta=0.15025275671838492, topK=132, implicit=True)
#recommender_RP3beta.fit(topK=167, alpha=1.0, beta=0.4520495673133021, implicit=True) old

RP3betaRecommender: Similarity column 24507 (100.0%), 2046.01 column/sec. Elapsed time 11.98 sec


In [6]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_EaseR = EASE_R_Recommender(URM_all)
recommender_EaseR.fit(topK=None, normalize_matrix=False, l2_norm=157.86876317814773)

EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 2.76 min


In [7]:
print("MAP of the starting models")

result_df, _ = evaluator_test.evaluateRecommender(recommender_SLIMElasticNet)
print("SLIM ElasticNet - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_test.evaluateRecommender(recommender_RP3beta)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_test.evaluateRecommender(recommender_EaseR)
print("EaseR - MAP: {}".format(result_df.loc[10]["MAP"]))

MAP of the starting models
EvaluatorHoldout: Processed 40680 (100.0%) in 26.69 sec. Users per second: 1524
SLIM ElasticNet - MAP: 0.0
EvaluatorHoldout: Processed 40680 (100.0%) in 12.08 sec. Users per second: 3368
RP3beta - MAP: 0.0
EvaluatorHoldout: Processed 40680 (100.0%) in 22.61 sec. Users per second: 1799
EaseR - MAP: 0.0


In [8]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class MergeRaccomandationsHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """

    RECOMMENDER_NAME = "MergeRaccomandationsHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2, recommender_3):
        super(MergeRaccomandationsHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
    
    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)
        item_weights_3 = self.recommender_3._compute_item_score(user_id_array)

        item_weights = item_weights_1*0.3 + item_weights_2*0.3 + item_weights_3*0.3

        return item_weights
        
    def recommend(self, user_id_array, cutoff = None, remove_seen_flag=True, items_to_compute = None,
                  remove_top_pop_flag = False, remove_custom_items_flag = False, return_scores = False):

        # If is a scalar transform it in a 1-cell array
        if np.isscalar(user_id_array):
            user_id_array = np.atleast_1d(user_id_array)
            single_user = True
        else:
            single_user = False

        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        cutoff = min(cutoff, self.URM_train.shape[1] - 1)

        # Compute the scores using the model-specific function
        # Vectorize over all users in user_id_array
        scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute)


        for user_index in range(len(user_id_array)):

            user_id = user_id_array[user_index]

            if remove_seen_flag:
                scores_batch[user_index,:] = self._remove_seen_on_scores(user_id, scores_batch[user_index, :])

            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index
            # relevant_items_partition = (-scores_user).argpartition(cutoff)[0:cutoff]
            # relevant_items_partition_sorting = np.argsort(-scores_user[relevant_items_partition])
            # ranking = relevant_items_partition[relevant_items_partition_sorting]
            #
            # ranking_list.append(ranking)


        if remove_top_pop_flag:
            scores_batch = self._remove_TopPop_on_scores(scores_batch)

        if remove_custom_items_flag:
            scores_batch = self._remove_custom_items_on_scores(scores_batch)

        # relevant_items_partition is block_size x cutoff
        relevant_items_partition = (-scores_batch).argpartition(cutoff, axis=1)[:,0:cutoff]

        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores_batch[np.arange(scores_batch.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting]
        
        ranking_list = [None] * ranking.shape[0]

        # Remove from the recommendation list any item that has a -inf score
        # Since -inf is a flag to indicate an item to remove
        for user_index in range(len(user_id_array)):
            user_recommendation_list = ranking[user_index]
            user_item_scores = scores_batch[user_index, user_recommendation_list]

            not_inf_scores_mask = np.logical_not(np.isinf(user_item_scores))

            user_recommendation_list = user_recommendation_list[not_inf_scores_mask]
            ranking_list[user_index] = user_recommendation_list.tolist()
            
            
        # MODIFIED PART TO MERGE THE LISTS
        list1 = self.recommender_1.recommend(user_id_array, cutoff = cutoff)
        list2 = self.recommender_2.recommend(user_id_array, cutoff = cutoff)
        list3 = self.recommender_3.recommend(user_id_array, cutoff = cutoff)
        
        list1 = list1[0]
        list2 = list2[0]
        list3 = list3[0]
        
        result = []
        i = 0
            
        while len(result) < cutoff:
            if list1[i] not in result:
                result.append(list1[i])
            if (list2[i] != list1[i]):
                if list2[i] not in result:
                    if len(result) < cutoff:
                        result.append(list2[i])
            if(list3[i] != list2[i]):
                if list3[i] not in result:
                    if len(result) < cutoff :
                        result.append(list3[i])
            i = i + 1
        
        if return_scores:
            return result, scores_batch
        else:
            return result

In [9]:
recommender_SLIMElasticNet.recommend(user_id_array=0, cutoff = 10)

[617, 752, 18484, 391, 20, 56, 2488, 107, 23, 673]

In [10]:
recommender_RP3beta.recommend(user_id_array=0, cutoff = 10)

[2494, 391, 23, 107, 750, 828, 1534, 20, 1532, 5]

In [11]:
recommender_EaseR.recommend(user_id_array=0, cutoff = 10)

[20, 673, 391, 752, 617, 23, 25, 2488, 519, 29]

In [12]:
recommender = MergeRaccomandationsHybridRecommender(URM_all, recommender_SLIMElasticNet, recommender_RP3beta, recommender_EaseR)
recommender.recommend(user_id_array=0, cutoff = 10)

[617, 2494, 20, 752, 391, 673, 18484, 23, 107, 750]

## Submissions

In [23]:
test_users = pd.read_csv('../../Dataset/data_target_users_test.csv')
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))
for index in range(len(recommendations)):
    recommendations[index] = np.array(recommendations[index])

test_users['item_list'] = recommendations
test_users['item_list'] = pd.DataFrame(
    [str(line).strip('[').strip(']').replace("'", "") for line in test_users['item_list']])
test_users.to_csv('../Submissions\Submission_11_Hybrid.csv', index=False)

FileNotFoundError: [Errno 2] No such file or directory: '../Dataset/data_target_users_test.csv'

In [24]:
#test_users = pd.read_csv('Recommender-Systems-Challenge-2022/Dataset/data_target_users_test.csv')
test_users = pd.read_csv('../../Dataset/data_target_users_test.csv')

In [25]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [27]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('../../Submissions/Submission_SLIM_EN_rp3Beta_EaseR_ListMerge.csv', index=False)