In [2]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
URM_all = load_urm()

#URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
#URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)
URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1581 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 936 ( 2.2%) Users that have less than 1 test interactions


In [4]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

# create a Slim elasticnet object
slim_en = SLIMElasticNetRecommender(URM_all)
slim_en.fit(topK=799, l1_ratio=0.006782112530625445, alpha=0.0023657508503917664) 

SLIMElasticNetRecommender: Processed 2791 (11.4%) in 5.00 min. Items per second: 9.30
SLIMElasticNetRecommender: Processed 5783 (23.6%) in 10.00 min. Items per second: 9.64
SLIMElasticNetRecommender: Processed 8817 (36.0%) in 15.00 min. Items per second: 9.79
SLIMElasticNetRecommender: Processed 11853 (48.4%) in 20.00 min. Items per second: 9.87
SLIMElasticNetRecommender: Processed 14927 (60.9%) in 25.01 min. Items per second: 9.95
SLIMElasticNetRecommender: Processed 17799 (72.6%) in 30.01 min. Items per second: 9.89
SLIMElasticNetRecommender: Processed 20633 (84.2%) in 35.01 min. Items per second: 9.82
SLIMElasticNetRecommender: Processed 23504 (95.9%) in 40.01 min. Items per second: 9.79
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 41.65 min. Items per second: 9.81


In [5]:
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

#fit the IALS model tuned in previous experiments
mf_ials = IALSRecommender(URM_all)
mf_ials.fit(num_factors = 53,
            confidence_scaling= 'linear',
            alpha = 1.0,
            epsilon = 8.071768337775882,
            reg = 1e-05,
            epochs = 45)

IALSRecommender: Epoch 1 of 45. Elapsed time 19.34 sec
IALSRecommender: Epoch 2 of 45. Elapsed time 39.96 sec
IALSRecommender: Epoch 3 of 45. Elapsed time 1.01 min
IALSRecommender: Epoch 4 of 45. Elapsed time 1.34 min
IALSRecommender: Epoch 5 of 45. Elapsed time 1.70 min
IALSRecommender: Epoch 6 of 45. Elapsed time 2.05 min
IALSRecommender: Epoch 7 of 45. Elapsed time 2.37 min
IALSRecommender: Epoch 8 of 45. Elapsed time 2.72 min
IALSRecommender: Epoch 9 of 45. Elapsed time 3.08 min
IALSRecommender: Epoch 10 of 45. Elapsed time 3.41 min
IALSRecommender: Epoch 11 of 45. Elapsed time 3.77 min
IALSRecommender: Epoch 12 of 45. Elapsed time 4.13 min
IALSRecommender: Epoch 13 of 45. Elapsed time 4.45 min
IALSRecommender: Epoch 14 of 45. Elapsed time 4.80 min
IALSRecommender: Epoch 15 of 45. Elapsed time 5.16 min
IALSRecommender: Epoch 16 of 45. Elapsed time 5.48 min
IALSRecommender: Epoch 17 of 45. Elapsed time 5.84 min
IALSRecommender: Epoch 18 of 45. Elapsed time 6.16 min
IALSRecommender: 

In [6]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class MergeRaccomandationsHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """

    RECOMMENDER_NAME = "MergeRaccomandationsHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2):
        super(MergeRaccomandationsHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
    
    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        item_weights = item_weights_1*0.5 + item_weights_2*0.5

        return item_weights
        
    def recommend(self, user_id_array, cutoff = None, remove_seen_flag=True, items_to_compute = None,
                  remove_top_pop_flag = False, remove_custom_items_flag = False, return_scores = False):

        # If is a scalar transform it in a 1-cell array
        if np.isscalar(user_id_array):
            user_id_array = np.atleast_1d(user_id_array)
            single_user = True
        else:
            single_user = False

        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        cutoff = min(cutoff, self.URM_train.shape[1] - 1)

        # Compute the scores using the model-specific function
        # Vectorize over all users in user_id_array
        scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute)


        for user_index in range(len(user_id_array)):

            user_id = user_id_array[user_index]

            if remove_seen_flag:
                scores_batch[user_index,:] = self._remove_seen_on_scores(user_id, scores_batch[user_index, :])

            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index
            # relevant_items_partition = (-scores_user).argpartition(cutoff)[0:cutoff]
            # relevant_items_partition_sorting = np.argsort(-scores_user[relevant_items_partition])
            # ranking = relevant_items_partition[relevant_items_partition_sorting]
            #
            # ranking_list.append(ranking)


        if remove_top_pop_flag:
            scores_batch = self._remove_TopPop_on_scores(scores_batch)

        if remove_custom_items_flag:
            scores_batch = self._remove_custom_items_on_scores(scores_batch)

        # relevant_items_partition is block_size x cutoff
        relevant_items_partition = (-scores_batch).argpartition(cutoff, axis=1)[:,0:cutoff]

        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores_batch[np.arange(scores_batch.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting]
        
        ranking_list = [None] * ranking.shape[0]

        # Remove from the recommendation list any item that has a -inf score
        # Since -inf is a flag to indicate an item to remove
        for user_index in range(len(user_id_array)):
            user_recommendation_list = ranking[user_index]
            user_item_scores = scores_batch[user_index, user_recommendation_list]

            not_inf_scores_mask = np.logical_not(np.isinf(user_item_scores))

            user_recommendation_list = user_recommendation_list[not_inf_scores_mask]
            ranking_list[user_index] = user_recommendation_list.tolist()
            
            
        # MODIFIED PART TO MERGE THE LISTS
        list1 = self.recommender_1.recommend(user_id_array, cutoff = cutoff)
        list2 = self.recommender_2.recommend(user_id_array, cutoff = cutoff)
        
        list1 = list1[0]
        list2 = list2[0]
        
        result = []
        i = 0
            
        while len(result) < cutoff:
            if list1[i] not in result:
                result.append(list1[i])
            if (list2[i] != list1[i]):
                if list2[i] not in result:
                    if len(result) < cutoff:
                        result.append(list2[i])
            i = i + 1
        
        if return_scores:
            return result, scores_batch
        else:
            return result

In [7]:
slim_en.recommend(user_id_array=0, cutoff = 10)

[617, 752, 18484, 391, 20, 56, 2488, 107, 23, 673]

In [8]:
mf_ials.recommend(user_id_array=0, cutoff = 10)

[519, 53, 1023, 560, 353, 432, 750, 102, 1064, 1024]

In [9]:
recommender = MergeRaccomandationsHybridRecommender(URM_all, slim_en, mf_ials)
recommender.recommend(user_id_array=0, cutoff = 10)

[617, 519, 752, 53, 18484, 1023, 391, 560, 20, 353]

## Submissions

In [10]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [11]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [12]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions/Submission_SLIM_EN_IALS_ListMerge.csv', index=False)