In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

Cloning into 'Recommender-Systems-Challenge-2022'...
remote: Enumerating objects: 589, done.[K
remote: Counting objects: 100% (185/185), done.[K
remote: Compressing objects: 100% (134/134), done.[K
remote: Total 589 (delta 96), reused 121 (delta 50), pack-reused 404[K
Receiving objects: 100% (589/589), 96.00 MiB | 21.16 MiB/s, done.
Resolving deltas: 100% (268/268), done.
Updating files: 100% (244/244), done.


In [2]:
URM_all = load_urm()

#URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
#URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)
URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1539 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 902 ( 2.2%) Users that have less than 1 test interactions


In [3]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(URM_all)
recommender_SLIMElasticNet.fit(topK=585, l1_ratio=0.009929794168572358, alpha=0.0018687691383859428)
#recommender_SLIMElasticNet.fit(topK=405, l1_ratio=0.0010299956370568744, alpha=0.01) old values
#recommender_SLIMElasticNet.fit(topK=306, l1_ratio=0.0035658486108087256, alpha=0.004181961739488717) MAP=0.023007382689392016
#recommender_SLIMElasticNet.fit(topK=799, l1_ratio=0.006782112530625445, alpha=0.0023657508503917664) 

SLIMElasticNetRecommender: Processed 2435 ( 9.9%) in 5.00 min. Items per second: 8.11
SLIMElasticNetRecommender: Processed 4919 (20.1%) in 10.00 min. Items per second: 8.20
SLIMElasticNetRecommender: Processed 7515 (30.7%) in 15.00 min. Items per second: 8.35
SLIMElasticNetRecommender: Processed 10175 (41.5%) in 20.00 min. Items per second: 8.48
SLIMElasticNetRecommender: Processed 12902 (52.6%) in 25.00 min. Items per second: 8.60
SLIMElasticNetRecommender: Processed 15621 (63.7%) in 30.01 min. Items per second: 8.68
SLIMElasticNetRecommender: Processed 18302 (74.7%) in 35.01 min. Items per second: 8.71
SLIMElasticNetRecommender: Processed 21019 (85.8%) in 40.01 min. Items per second: 8.76
SLIMElasticNetRecommender: Processed 23755 (96.9%) in 45.01 min. Items per second: 8.80
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 46.39 min. Items per second: 8.80


In [4]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_RP3beta = RP3betaRecommender(URM_all)
recommender_RP3beta.fit(alpha=0.9440960931970539, beta=0.23362882450834893, topK=54, implicit=True)
#recommender_RP3beta.fit(alpha=0.3196900461986604, beta=0.3530045735373658, topK=44, implicit=False) #MAP: 0.021632459998098784
#recommender_RP3beta.fit(alpha=0.08934428697754494, beta=0.15025275671838492, topK=132, implicit=True)
#recommender_RP3beta.fit(topK=167, alpha=1.0, beta=0.4520495673133021, implicit=True) old

RP3betaRecommender: Similarity column 24507 (100.0%), 1017.89 column/sec. Elapsed time 24.08 sec


In [5]:
print("MAP of the starting models")

result_df, _ = evaluator_test.evaluateRecommender(recommender_SLIMElasticNet)
print("SLIM ElasticNet - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_test.evaluateRecommender(recommender_RP3beta)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

MAP of the starting models
EvaluatorHoldout: Processed 40727 (100.0%) in 54.02 sec. Users per second: 754
SLIM ElasticNet - MAP: 0.0
EvaluatorHoldout: Processed 40727 (100.0%) in 31.11 sec. Users per second: 1309
RP3beta - MAP: 0.0


In [6]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class MergeRaccomandationsHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """

    RECOMMENDER_NAME = "MergeRaccomandationsHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2):
        super(MergeRaccomandationsHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
    
    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        item_weights = item_weights_1*0.5 + item_weights_2*0.5

        return item_weights
        
    def recommend(self, user_id_array, cutoff = None, remove_seen_flag=True, items_to_compute = None,
                  remove_top_pop_flag = False, remove_custom_items_flag = False, return_scores = False):

        # If is a scalar transform it in a 1-cell array
        if np.isscalar(user_id_array):
            user_id_array = np.atleast_1d(user_id_array)
            single_user = True
        else:
            single_user = False

        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        cutoff = min(cutoff, self.URM_train.shape[1] - 1)

        # Compute the scores using the model-specific function
        # Vectorize over all users in user_id_array
        scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute)


        for user_index in range(len(user_id_array)):

            user_id = user_id_array[user_index]

            if remove_seen_flag:
                scores_batch[user_index,:] = self._remove_seen_on_scores(user_id, scores_batch[user_index, :])

            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index
            # relevant_items_partition = (-scores_user).argpartition(cutoff)[0:cutoff]
            # relevant_items_partition_sorting = np.argsort(-scores_user[relevant_items_partition])
            # ranking = relevant_items_partition[relevant_items_partition_sorting]
            #
            # ranking_list.append(ranking)


        if remove_top_pop_flag:
            scores_batch = self._remove_TopPop_on_scores(scores_batch)

        if remove_custom_items_flag:
            scores_batch = self._remove_custom_items_on_scores(scores_batch)

        # relevant_items_partition is block_size x cutoff
        relevant_items_partition = (-scores_batch).argpartition(cutoff, axis=1)[:,0:cutoff]

        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores_batch[np.arange(scores_batch.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting]
        
        ranking_list = [None] * ranking.shape[0]

        # Remove from the recommendation list any item that has a -inf score
        # Since -inf is a flag to indicate an item to remove
        for user_index in range(len(user_id_array)):
            user_recommendation_list = ranking[user_index]
            user_item_scores = scores_batch[user_index, user_recommendation_list]

            not_inf_scores_mask = np.logical_not(np.isinf(user_item_scores))

            user_recommendation_list = user_recommendation_list[not_inf_scores_mask]
            ranking_list[user_index] = user_recommendation_list.tolist()
            
            
        # MODIFIED PART TO MERGE THE LISTS
        list1 = self.recommender_1.recommend(user_id_array, cutoff = cutoff)
        list2 = self.recommender_2.recommend(user_id_array, cutoff = cutoff)
        
        list1 = list1[0]
        list2 = list2[0]
        
        result = []
        i = 0
            
        while len(result) < cutoff:
            if list1[i] not in result:
                result.append(list1[i])
            if (list2[i] != list1[i]):
                if list2[i] not in result:
                    if len(result) < cutoff:
                        result.append(list2[i])
            i = i + 1
        
        if return_scores:
            return result, scores_batch
        else:
            return result

In [7]:
recommender_SLIMElasticNet.recommend(user_id_array=0, cutoff = 10)

[617, 752, 18484, 391, 2488, 56, 20, 107, 2792, 23]

In [8]:
recommender_RP3beta.recommend(user_id_array=0, cutoff = 10)

[391, 20, 2494, 23, 29, 617, 516, 828, 752, 18484]

In [9]:
recommender = MergeRaccomandationsHybridRecommender(URM_all, recommender_SLIMElasticNet, recommender_RP3beta)
recommender.recommend(user_id_array=0, cutoff = 10)

[617, 391, 752, 20, 18484, 2494, 23, 2488, 29, 56]

## Submissions

In [12]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [13]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [14]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions/Submission_SLIM_EN_rp3Beta_ListMerge.csv', index=False)