In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

Cloning into 'Recommender-Systems-Challenge-2022'...
remote: Enumerating objects: 571, done.[K
remote: Counting objects: 100% (167/167), done.[K
remote: Compressing objects: 100% (119/119), done.[K
remote: Total 571 (delta 87), reused 109 (delta 47), pack-reused 404[K
Receiving objects: 100% (571/571), 91.68 MiB | 23.81 MiB/s, done.
Resolving deltas: 100% (259/259), done.
Updating files: 100% (236/236), done.


In [2]:
URM_all = load_urm()

#URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
#URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)
URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1578 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 949 ( 2.3%) Users that have less than 1 test interactions


In [3]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_SLIMElasticNet = SLIMElasticNetRecommender(URM_all)
recommender_SLIMElasticNet.fit(topK=799, l1_ratio=0.006782112530625445, alpha=0.0023657508503917664) 

SLIMElasticNetRecommender: Processed 4038 (16.5%) in 5.00 min. Items per second: 13.45
SLIMElasticNetRecommender: Processed 8490 (34.6%) in 10.00 min. Items per second: 14.15
SLIMElasticNetRecommender: Processed 13001 (53.1%) in 15.00 min. Items per second: 14.44
SLIMElasticNetRecommender: Processed 17492 (71.4%) in 20.00 min. Items per second: 14.57
SLIMElasticNetRecommender: Processed 22053 (90.0%) in 25.00 min. Items per second: 14.70
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 27.68 min. Items per second: 14.75


In [4]:
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_EaseR = EASE_R_Recommender(URM_all)
recommender_EaseR.fit(topK=None, normalize_matrix=False, l2_norm=157.86876317814773)

EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 7.69 min


In [5]:
# create a recommender object which performs the hybridation of the 2 models
from Recommenders.BaseRecommender import BaseRecommender

class ScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha)

    """

    RECOMMENDER_NAME = "ScoresHybridRecommender"

    def __init__(self, URM_train, recommender_1, recommender_2):
        super(ScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        
        
    def fit(self, alpha = 0.5):
        self.alpha = alpha      


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        item_weights = item_weights_1*self.alpha + item_weights_2*(1-self.alpha)

        return item_weights

In [6]:
recommender = ScoresHybridRecommender(URM_train, recommender_SLIMElasticNet, recommender_EaseR)
recommender.fit(alpha = 0)

## Submissions

In [7]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [8]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [9]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions/Submission_SLIM_EN_EaseR.csv', index=False)