In [1]:
!cp -r ../input/d/romanofrancesco/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./

In [2]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
#load data_train, matrix of users interaction
URM_path = "../input/recommender-system-2021-challenge-polimi/data_train.csv"
URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]

In [4]:
URM_all_dataframe.head()

Unnamed: 0,UserID,ItemID,Interaction
0,0,53,1.0
1,0,209,1.0
2,0,223,1.0
3,0,249,1.0
4,0,435,1.0


In [5]:
userID_unique = URM_all_dataframe["UserID"].unique()
itemID_unique = URM_all_dataframe["ItemID"].unique()

n_users = len(userID_unique)
n_items = len(itemID_unique)
n_interactions = len(URM_all_dataframe)

print ("Number of items\t {}, Number of users\t {}".format(n_items, n_users))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))
print ("Average interactions per user {:.2f}".format(n_interactions/n_users))
print ("Average interactions per item {:.2f}\n".format(n_interactions/n_items))

print ("Sparsity {:.2f} %".format((1-float(n_interactions)/(n_items*n_users))*100))

Number of items	 18059, Number of users	 13650
Max ID items	 18058, Max Id users	 13649

Average interactions per user 387.23
Average interactions per item 292.69

Sparsity 97.86 %


In [6]:
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)
URM_all

<13650x18059 sparse matrix of type '<class 'numpy.float64'>'
	with 5285664 stored elements in Compressed Sparse Row format>

In [7]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

# split data into train and validation data 80/20
URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [8]:
from Evaluation.Evaluator import EvaluatorHoldout

#create an evaluator object to evaluate validation set
#we will use it for hyperparameter tuning
evaluator_valid = EvaluatorHoldout(URM_valid, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13645 ( 0.0%) Users that have less than 1 test interactions


In [9]:
import numpy as np
import scipy.sparse as sps
from Recommenders.Recommender_utils import check_matrix
from sklearn.linear_model import ElasticNet
from Recommenders.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit
import time, sys
from tqdm import tqdm
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

# os.environ["PYTHONWARNINGS"] = ('ignore::exceptions.ConvergenceWarning:sklearn.linear_model')
# os.environ["PYTHONWARNINGS"] = ('ignore:Objective did not converge:ConvergenceWarning:')

class SLIMElasticNetRecommender(BaseItemSimilarityMatrixRecommender):
    """
    Train a Sparse Linear Methods (SLIM) item similarity model.
    NOTE: ElasticNet solver is parallel, a single intance of SLIM_ElasticNet will
          make use of half the cores available
    See:
        Efficient Top-N Recommendation by Linear Regression,
        M. Levy and K. Jack, LSRS workshop at RecSys 2013.
        SLIM: Sparse linear methods for top-n recommender systems,
        X. Ning and G. Karypis, ICDM 2011.
        http://glaros.dtc.umn.edu/gkhome/fetch/papers/SLIM2011icdm.pdf
    """

    RECOMMENDER_NAME = "SLIMElasticNetRecommender"

    def __init__(self, URM_train, verbose = True):
        super(SLIMElasticNetRecommender, self).__init__(URM_train, verbose = verbose)

    @ignore_warnings(category=ConvergenceWarning)
    def fit(self, l1_ratio=0.1, alpha = 1.0, positive_only=True, topK = 100,**earlystopping_kwargs):

        assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio)

        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK


        # initialize the ElasticNet model
        self.model = ElasticNet(alpha=alpha,
                                l1_ratio=self.l1_ratio,
                                positive=self.positive_only,
                                fit_intercept=False,
                                copy_X=False,
                                precompute=True,
                                selection='random',
                                max_iter=100,
                                tol=1e-4)

        URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)

        n_items = URM_train.shape[1]

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0

        start_time = time.time()
        start_time_printBatch = start_time

        # fit each item's factors sequentially (not in parallel)
        for currentItem in range(n_items):

            # get the target column
            y = URM_train[:, currentItem].toarray()

            # set the j-th column of X to zero
            start_pos = URM_train.indptr[currentItem]
            end_pos = URM_train.indptr[currentItem + 1]

            current_item_data_backup = URM_train.data[start_pos: end_pos].copy()
            URM_train.data[start_pos: end_pos] = 0.0

            # fit one ElasticNet model per column
            self.model.fit(URM_train, y)

            # self.model.coef_ contains the coefficient of the ElasticNet model
            # let's keep only the non-zero values

            # Select topK values
            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index

            nonzero_model_coef_index = self.model.sparse_coef_.indices
            nonzero_model_coef_value = self.model.sparse_coef_.data

            local_topK = min(len(nonzero_model_coef_value)-1, self.topK)

            relevant_items_partition = (-nonzero_model_coef_value).argpartition(local_topK)[0:local_topK]
            relevant_items_partition_sorting = np.argsort(-nonzero_model_coef_value[relevant_items_partition])
            ranking = relevant_items_partition[relevant_items_partition_sorting]

            for index in range(len(ranking)):

                if numCells == len(rows):
                    rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32)))
                    cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32)))
                    values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32)))


                rows[numCells] = nonzero_model_coef_index[ranking[index]]
                cols[numCells] = currentItem
                values[numCells] = nonzero_model_coef_value[ranking[index]]

                numCells += 1

            # finally, replace the original values of the j-th column
            URM_train.data[start_pos:end_pos] = current_item_data_backup

            elapsed_time = time.time() - start_time
            new_time_value, new_time_unit = seconds_to_biggest_unit(elapsed_time)


            if time.time() - start_time_printBatch > 300 or currentItem == n_items-1:
                self._print("Processed {} ({:4.1f}%) in {:.2f} {}. Items per second: {:.2f}".format(
                    currentItem+1,
                    100.0* float(currentItem+1)/n_items,
                    new_time_value,
                    new_time_unit,
                    float(currentItem)/elapsed_time))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        # generate the sparse weight matrix
        self.W_sparse = sps.csr_matrix((values[:numCells], (rows[:numCells], cols[:numCells])),
                                       shape=(n_items, n_items), dtype=np.float32)




In [10]:
# PREVIOUS BEST MODEL AS BASELINE
baseline = SLIMElasticNetRecommender(URM_train)
baseline.fit(l1_ratio = 0.001404017101088145, alpha = 0.06305313484275951, positive_only = True, topK = 2594)
print("BASELINE MODEL:")
print(evaluator_valid.evaluateRecommender(baseline))

SLIMElasticNetRecommender: Processed 1410 ( 7.8%) in 5.00 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 2820 (15.6%) in 10.01 min. Items per second: 4.70


SLIMElasticNetRecommender: Processed 4223 (23.4%) in 15.01 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 5634 (31.2%) in 20.01 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 7057 (39.1%) in 25.01 min. Items per second: 4.70


SLIMElasticNetRecommender: Processed 8506 (47.1%) in 30.01 min. Items per second: 4.72


SLIMElasticNetRecommender: Processed 9879 (54.7%) in 35.01 min. Items per second: 4.70


SLIMElasticNetRecommender: Processed 11249 (62.3%) in 40.02 min. Items per second: 4.68


SLIMElasticNetRecommender: Processed 12646 (70.0%) in 45.02 min. Items per second: 4.68


SLIMElasticNetRecommender: Processed 14022 (77.6%) in 50.02 min. Items per second: 4.67


SLIMElasticNetRecommender: Processed 15406 (85.3%) in 55.02 min. Items per second: 4.67


SLIMElasticNetRecommender: Processed 16784 (92.9%) in 1.00 hour. Items per second: 4.66


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.08 hour. Items per second: 4.65


BASELINE MODEL:


EvaluatorHoldout: Processed 13645 (100.0%) in 31.37 sec. Users per second: 435


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL      MAP MAP_MIN_DEN  \
cutoff                                                                     
10      0.401766                 0.403598  0.072153  0.24849     0.24939   

             MRR      NDCG        F1 HIT_RATE ARHR_ALL_HITS  ...  \
cutoff                                                       ...   
10      0.646931  0.415135  0.122335  0.97325      1.245545  ...   

       COVERAGE_ITEM_CORRECT COVERAGE_USER COVERAGE_USER_CORRECT  \
cutoff                                                             
10                  0.052882      0.999634              0.972894   

       DIVERSITY_GINI SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL  \
cutoff                                                             
10           0.011148        7.918072                   0.992185   

       RATIO_DIVERSITY_GINI RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY  \
cutoff                                                                       

In [11]:
recommender_class = SLIMElasticNetRecommender

In [12]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [13]:
from skopt.space import Real, Integer, Categorical

#param = l1_ratio=0.1, alpha = 1.0, positive_only=True, topK = 100

hyperparameters_range_dictionary = {
    "l1_ratio": Real(low = 0.0005, high = 0.05, prior = 'log-uniform'),
    "alpha": Real(low = 0.01, high = 0.1, prior = 'log-uniform'),
    "topK": Integer(300,4000)
}

In [14]:
#We also setup the early stopping 
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_valid,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [15]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_valid)

In [16]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [17]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [18]:
#let's run the bayesian search
hyperparameterSearch.search(recommender_input_args = recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'l1_ratio': 0.0018286262247658193, 'alpha': 0.07688997944095932, 'topK': 1035}


SLIMElasticNetRecommender: Processed 1327 ( 7.3%) in 5.00 min. Items per second: 4.42


SLIMElasticNetRecommender: Processed 2773 (15.4%) in 10.00 min. Items per second: 4.62


SLIMElasticNetRecommender: Processed 4258 (23.6%) in 15.00 min. Items per second: 4.73


SLIMElasticNetRecommender: Processed 5727 (31.7%) in 20.01 min. Items per second: 4.77


SLIMElasticNetRecommender: Processed 7217 (40.0%) in 25.01 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 8681 (48.1%) in 30.01 min. Items per second: 4.82


SLIMElasticNetRecommender: Processed 10165 (56.3%) in 35.01 min. Items per second: 4.84


SLIMElasticNetRecommender: Processed 11663 (64.6%) in 40.01 min. Items per second: 4.86


SLIMElasticNetRecommender: Processed 13208 (73.1%) in 45.01 min. Items per second: 4.89


SLIMElasticNetRecommender: Processed 14733 (81.6%) in 50.02 min. Items per second: 4.91


SLIMElasticNetRecommender: Processed 16261 (90.0%) in 55.02 min. Items per second: 4.93


SLIMElasticNetRecommender: Processed 17758 (98.3%) in 1.00 hour. Items per second: 4.93


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.02 hour. Items per second: 4.93


EvaluatorHoldout: Processed 13645 (100.0%) in 29.56 sec. Users per second: 462


SearchBayesianSkopt: New best config found. Config 0: {'l1_ratio': 0.0018286262247658193, 'alpha': 0.07688997944095932, 'topK': 1035} - results: PRECISION: 0.4018395, PRECISION_RECALL_MIN_DEN: 0.4035943, RECALL: 0.0719630, MAP: 0.2487156, MAP_MIN_DEN: 0.2496051, MRR: 0.6473326, NDCG: 0.4152518, F1: 0.1220659, HIT_RATE: 0.9731770, ARHR_ALL_HITS: 1.2462145, NOVELTY: 0.0055228, AVERAGE_POPULARITY: 0.5376364, DIVERSITY_MEAN_INTER_LIST: 0.9161156, DIVERSITY_HERFINDAHL: 0.9916048, COVERAGE_ITEM: 0.0730384, COVERAGE_ITEM_CORRECT: 0.0515532, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9728205, DIVERSITY_GINI: 0.0108059, SHANNON_ENTROPY: 7.8768686, RATIO_DIVERSITY_HERFINDAHL: 0.9919875, RATIO_DIVERSITY_GINI: 0.0435916, RATIO_SHANNON_ENTROPY: 0.6357347, RATIO_AVERAGE_POPULARITY: 2.6718696, RATIO_NOVELTY: 0.0259847, 



Iteration No: 1 ended. Evaluation done at random point.
Time taken: 3692.6566
Function value obtained: -0.2487
Current minimum: -0.2487
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'l1_ratio': 0.005055410319074547, 'alpha': 0.06345806360287587, 'topK': 529}


SLIMElasticNetRecommender: Processed 1483 ( 8.2%) in 5.00 min. Items per second: 4.94


SLIMElasticNetRecommender: Processed 2945 (16.3%) in 10.00 min. Items per second: 4.91


SLIMElasticNetRecommender: Processed 4469 (24.7%) in 15.01 min. Items per second: 4.96


SLIMElasticNetRecommender: Processed 5943 (32.9%) in 20.01 min. Items per second: 4.95


SLIMElasticNetRecommender: Processed 7362 (40.8%) in 25.01 min. Items per second: 4.91


SLIMElasticNetRecommender: Processed 8844 (49.0%) in 30.01 min. Items per second: 4.91


SLIMElasticNetRecommender: Processed 10331 (57.2%) in 35.02 min. Items per second: 4.92


SLIMElasticNetRecommender: Processed 11813 (65.4%) in 40.02 min. Items per second: 4.92


SLIMElasticNetRecommender: Processed 13295 (73.6%) in 45.02 min. Items per second: 4.92


SLIMElasticNetRecommender: Processed 14762 (81.7%) in 50.02 min. Items per second: 4.92


SLIMElasticNetRecommender: Processed 16252 (90.0%) in 55.02 min. Items per second: 4.92


SLIMElasticNetRecommender: Processed 17726 (98.2%) in 1.00 hour. Items per second: 4.92


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.02 hour. Items per second: 4.92


EvaluatorHoldout: Processed 13645 (100.0%) in 27.15 sec. Users per second: 503


SearchBayesianSkopt: Config 1 is suboptimal. Config: {'l1_ratio': 0.005055410319074547, 'alpha': 0.06345806360287587, 'topK': 529} - results: PRECISION: 0.4017442, PRECISION_RECALL_MIN_DEN: 0.4035235, RECALL: 0.0720203, MAP: 0.2484900, MAP_MIN_DEN: 0.2493837, MRR: 0.6466629, NDCG: 0.4150367, F1: 0.1221439, HIT_RATE: 0.9732503, ARHR_ALL_HITS: 1.2451266, NOVELTY: 0.0055355, AVERAGE_POPULARITY: 0.5317481, DIVERSITY_MEAN_INTER_LIST: 0.9189548, DIVERSITY_HERFINDAHL: 0.9918887, COVERAGE_ITEM: 0.0751980, COVERAGE_ITEM_CORRECT: 0.0526053, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9728938, DIVERSITY_GINI: 0.0112719, SHANNON_ENTROPY: 7.9365093, RATIO_DIVERSITY_HERFINDAHL: 0.9922715, RATIO_DIVERSITY_GINI: 0.0454712, RATIO_SHANNON_ENTROPY: 0.6405483, RATIO_AVERAGE_POPULARITY: 2.6426063, RATIO_NOVELTY: 0.0260446, 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 3694.5560
Function value obtained: -0.2485
Current minimum: -0.2487
Iteration No: 3 started. Evaluating funct

SLIMElasticNetRecommender: Processed 1471 ( 8.1%) in 5.00 min. Items per second: 4.90


SLIMElasticNetRecommender: Processed 2906 (16.1%) in 10.00 min. Items per second: 4.84


SLIMElasticNetRecommender: Processed 4389 (24.3%) in 15.00 min. Items per second: 4.88


SLIMElasticNetRecommender: Processed 5866 (32.5%) in 20.00 min. Items per second: 4.89


SLIMElasticNetRecommender: Processed 7342 (40.7%) in 25.01 min. Items per second: 4.89


SLIMElasticNetRecommender: Processed 8814 (48.8%) in 30.01 min. Items per second: 4.89


SLIMElasticNetRecommender: Processed 10282 (56.9%) in 35.01 min. Items per second: 4.89


SLIMElasticNetRecommender: Processed 11755 (65.1%) in 40.01 min. Items per second: 4.90


SLIMElasticNetRecommender: Processed 13244 (73.3%) in 45.02 min. Items per second: 4.90


SLIMElasticNetRecommender: Processed 14716 (81.5%) in 50.02 min. Items per second: 4.90


SLIMElasticNetRecommender: Processed 16208 (89.8%) in 55.02 min. Items per second: 4.91


SLIMElasticNetRecommender: Processed 17680 (97.9%) in 1.00 hour. Items per second: 4.91


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.02 hour. Items per second: 4.91


EvaluatorHoldout: Processed 13645 (100.0%) in 27.27 sec. Users per second: 500


SearchBayesianSkopt: New best config found. Config 2: {'l1_ratio': 0.0049866640673402515, 'alpha': 0.04475399260515166, 'topK': 3684} - results: PRECISION: 0.4024844, PRECISION_RECALL_MIN_DEN: 0.4042664, RECALL: 0.0722745, MAP: 0.2488642, MAP_MIN_DEN: 0.2497442, MRR: 0.6476402, NDCG: 0.4156007, F1: 0.1225437, HIT_RATE: 0.9735434, ARHR_ALL_HITS: 1.2465086, NOVELTY: 0.0055529, AVERAGE_POPULARITY: 0.5246456, DIVERSITY_MEAN_INTER_LIST: 0.9229147, DIVERSITY_HERFINDAHL: 0.9922847, COVERAGE_ITEM: 0.0805692, COVERAGE_ITEM_CORRECT: 0.0567030, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9731868, DIVERSITY_GINI: 0.0121007, SHANNON_ENTROPY: 8.0292240, RATIO_DIVERSITY_HERFINDAHL: 0.9926676, RATIO_DIVERSITY_GINI: 0.0488147, RATIO_SHANNON_ENTROPY: 0.6480312, RATIO_AVERAGE_POPULARITY: 2.6073097, RATIO_NOVELTY: 0.0261262, 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 3704.9731
Function value obtained: -0.2489
Current minimum: -0.2489
Iteration No: 4 started. Evaluating fu

SLIMElasticNetRecommender: Processed 2090 (11.6%) in 5.00 min. Items per second: 6.96


SLIMElasticNetRecommender: Processed 4269 (23.6%) in 10.01 min. Items per second: 7.11


SLIMElasticNetRecommender: Processed 6425 (35.6%) in 15.01 min. Items per second: 7.13


SLIMElasticNetRecommender: Processed 8580 (47.5%) in 20.01 min. Items per second: 7.15


SLIMElasticNetRecommender: Processed 10723 (59.4%) in 25.01 min. Items per second: 7.15


SLIMElasticNetRecommender: Processed 12891 (71.4%) in 30.01 min. Items per second: 7.16


SLIMElasticNetRecommender: Processed 15015 (83.1%) in 35.01 min. Items per second: 7.15


SLIMElasticNetRecommender: Processed 17139 (94.9%) in 40.01 min. Items per second: 7.14


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 42.28 min. Items per second: 7.12


EvaluatorHoldout: Processed 13645 (100.0%) in 21.92 sec. Users per second: 623


SearchBayesianSkopt: Config 3 is suboptimal. Config: {'l1_ratio': 0.024363665684312186, 'alpha': 0.06671142435628305, 'topK': 3273} - results: PRECISION: 0.3978674, PRECISION_RECALL_MIN_DEN: 0.3996578, RECALL: 0.0713069, MAP: 0.2456713, MAP_MIN_DEN: 0.2465617, MRR: 0.6424317, NDCG: 0.4114396, F1: 0.1209388, HIT_RATE: 0.9725907, ARHR_ALL_HITS: 1.2349444, NOVELTY: 0.0054908, AVERAGE_POPULARITY: 0.5490802, DIVERSITY_MEAN_INTER_LIST: 0.9119100, DIVERSITY_HERFINDAHL: 0.9911843, COVERAGE_ITEM: 0.0651753, COVERAGE_ITEM_CORRECT: 0.0454067, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9722344, DIVERSITY_GINI: 0.0096946, SHANNON_ENTROPY: 7.7475198, RATIO_DIVERSITY_HERFINDAHL: 0.9915668, RATIO_DIVERSITY_GINI: 0.0391084, RATIO_SHANNON_ENTROPY: 0.6252951, RATIO_AVERAGE_POPULARITY: 2.7287414, RATIO_NOVELTY: 0.0258343, 

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 2558.9192
Function value obtained: -0.2457
Current minimum: -0.2489
Iteration No: 5 started. Evaluating func

SLIMElasticNetRecommender: Processed 1428 ( 7.9%) in 5.00 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 2801 (15.5%) in 10.01 min. Items per second: 4.66


SLIMElasticNetRecommender: Processed 4216 (23.3%) in 15.01 min. Items per second: 4.68


SLIMElasticNetRecommender: Processed 5657 (31.3%) in 20.01 min. Items per second: 4.71


SLIMElasticNetRecommender: Processed 7087 (39.2%) in 25.01 min. Items per second: 4.72


SLIMElasticNetRecommender: Processed 8542 (47.3%) in 30.01 min. Items per second: 4.74


SLIMElasticNetRecommender: Processed 9972 (55.2%) in 35.01 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 11417 (63.2%) in 40.02 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 12839 (71.1%) in 45.02 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 14259 (79.0%) in 50.02 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 15685 (86.9%) in 55.02 min. Items per second: 4.75


SLIMElasticNetRecommender: Processed 17117 (94.8%) in 1.00 hour. Items per second: 4.75


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.06 hour. Items per second: 4.75


EvaluatorHoldout: Processed 13645 (100.0%) in 24.46 sec. Users per second: 558


SearchBayesianSkopt: Config 4 is suboptimal. Config: {'l1_ratio': 0.025623604824795594, 'alpha': 0.010661136969244919, 'topK': 1159} - results: PRECISION: 0.4006156, PRECISION_RECALL_MIN_DEN: 0.4024376, RECALL: 0.0722095, MAP: 0.2469192, MAP_MIN_DEN: 0.2477533, MRR: 0.6465616, NDCG: 0.4136849, F1: 0.1223634, HIT_RATE: 0.9737633, ARHR_ALL_HITS: 1.2408459, NOVELTY: 0.0056349, AVERAGE_POPULARITY: 0.4928863, DIVERSITY_MEAN_INTER_LIST: 0.9363709, DIVERSITY_HERFINDAHL: 0.9936302, COVERAGE_ITEM: 0.1079240, COVERAGE_ITEM_CORRECT: 0.0727615, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9734066, DIVERSITY_GINI: 0.0160323, SHANNON_ENTROPY: 8.3942267, RATIO_DIVERSITY_HERFINDAHL: 0.9940136, RATIO_DIVERSITY_GINI: 0.0646749, RATIO_SHANNON_ENTROPY: 0.6774902, RATIO_AVERAGE_POPULARITY: 2.4494767, RATIO_NOVELTY: 0.0265121, 

Iteration No: 5 ended. Evaluation done at random point.
Time taken: 3823.0205
Function value obtained: -0.2469
Current minimum: -0.2489
Iteration No: 6 started. Evaluating fun

SLIMElasticNetRecommender: Processed 1393 ( 7.7%) in 5.00 min. Items per second: 4.64


SLIMElasticNetRecommender: Processed 2815 (15.6%) in 10.00 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 4265 (23.6%) in 15.00 min. Items per second: 4.74


SLIMElasticNetRecommender: Processed 5723 (31.7%) in 20.00 min. Items per second: 4.77


SLIMElasticNetRecommender: Processed 7165 (39.7%) in 25.01 min. Items per second: 4.77


SLIMElasticNetRecommender: Processed 8647 (47.9%) in 30.01 min. Items per second: 4.80


SLIMElasticNetRecommender: Processed 10097 (55.9%) in 35.01 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 11555 (64.0%) in 40.01 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 12989 (71.9%) in 45.02 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 14438 (79.9%) in 50.02 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 15890 (88.0%) in 55.02 min. Items per second: 4.81


SLIMElasticNetRecommender: Processed 17345 (96.0%) in 1.00 hour. Items per second: 4.82


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.04 hour. Items per second: 4.82


EvaluatorHoldout: Processed 13645 (100.0%) in 25.13 sec. Users per second: 543


SearchBayesianSkopt: Config 5 is suboptimal. Config: {'l1_ratio': 0.01876171376936078, 'alpha': 0.013115994949370395, 'topK': 622} - results: PRECISION: 0.4009747, PRECISION_RECALL_MIN_DEN: 0.4027994, RECALL: 0.0722770, MAP: 0.2471848, MAP_MIN_DEN: 0.2480198, MRR: 0.6466655, NDCG: 0.4140114, F1: 0.1224771, HIT_RATE: 0.9735434, ARHR_ALL_HITS: 1.2417749, NOVELTY: 0.0056331, AVERAGE_POPULARITY: 0.4925622, DIVERSITY_MEAN_INTER_LIST: 0.9365235, DIVERSITY_HERFINDAHL: 0.9936455, COVERAGE_ITEM: 0.1052107, COVERAGE_ITEM_CORRECT: 0.0717094, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9731868, DIVERSITY_GINI: 0.0158062, SHANNON_ENTROPY: 8.3843464, RATIO_DIVERSITY_HERFINDAHL: 0.9940289, RATIO_DIVERSITY_GINI: 0.0637632, RATIO_SHANNON_ENTROPY: 0.6766928, RATIO_AVERAGE_POPULARITY: 2.4478660, RATIO_NOVELTY: 0.0265035, 

Iteration No: 6 ended. Evaluation done at random point.
Time taken: 3774.1141
Function value obtained: -0.2472
Current minimum: -0.2489
Iteration No: 7 started. Evaluating funct

SLIMElasticNetRecommender: Processed 1351 ( 7.5%) in 5.00 min. Items per second: 4.50


SLIMElasticNetRecommender: Processed 2720 (15.1%) in 10.01 min. Items per second: 4.53


SLIMElasticNetRecommender: Processed 4066 (22.5%) in 15.01 min. Items per second: 4.51


SLIMElasticNetRecommender: Processed 5453 (30.2%) in 20.01 min. Items per second: 4.54


SLIMElasticNetRecommender: Processed 6823 (37.8%) in 25.01 min. Items per second: 4.55


SLIMElasticNetRecommender: Processed 8233 (45.6%) in 30.02 min. Items per second: 4.57


SLIMElasticNetRecommender: Processed 9591 (53.1%) in 35.02 min. Items per second: 4.56


SLIMElasticNetRecommender: Processed 10968 (60.7%) in 40.02 min. Items per second: 4.57


SLIMElasticNetRecommender: Processed 12341 (68.3%) in 45.02 min. Items per second: 4.57


SLIMElasticNetRecommender: Processed 13706 (75.9%) in 50.02 min. Items per second: 4.57


SLIMElasticNetRecommender: Processed 15077 (83.5%) in 55.03 min. Items per second: 4.57


SLIMElasticNetRecommender: Processed 16452 (91.1%) in 1.00 hour. Items per second: 4.57


SLIMElasticNetRecommender: Processed 17826 (98.7%) in 1.08 hour. Items per second: 4.57


SLIMElasticNetRecommender: Processed 18059 (100.0%) in 1.10 hour. Items per second: 4.57


EvaluatorHoldout: Processed 13645 (100.0%) in 23.92 sec. Users per second: 570


SearchBayesianSkopt: Config 6 is suboptimal. Config: {'l1_ratio': 0.0005522742712251818, 'alpha': 0.019953201428999933, 'topK': 730} - results: PRECISION: 0.3977721, PRECISION_RECALL_MIN_DEN: 0.3995698, RECALL: 0.0716991, MAP: 0.2439424, MAP_MIN_DEN: 0.2447927, MRR: 0.6416515, NDCG: 0.4103461, F1: 0.1214980, HIT_RATE: 0.9733968, ARHR_ALL_HITS: 1.2295269, NOVELTY: 0.0056993, AVERAGE_POPULARITY: 0.4629314, DIVERSITY_MEAN_INTER_LIST: 0.9456203, DIVERSITY_HERFINDAHL: 0.9945551, COVERAGE_ITEM: 0.1065950, COVERAGE_ITEM_CORRECT: 0.0772468, COVERAGE_USER: 0.9996337, COVERAGE_USER_CORRECT: 0.9730403, DIVERSITY_GINI: 0.0178927, SHANNON_ENTROPY: 8.5940220, RATIO_DIVERSITY_HERFINDAHL: 0.9949389, RATIO_DIVERSITY_GINI: 0.0721801, RATIO_SHANNON_ENTROPY: 0.6936155, RATIO_AVERAGE_POPULARITY: 2.3006108, RATIO_NOVELTY: 0.0268153, 

Iteration No: 7 ended. Evaluation done at random point.
Time taken: 3976.0521
Function value obtained: -0.2439
Current minimum: -0.2489
Iteration No: 8 started. Evaluating fun

SLIMElasticNetRecommender: Processed 1399 ( 7.7%) in 5.00 min. Items per second: 4.66


SLIMElasticNetRecommender: Processed 2803 (15.5%) in 10.00 min. Items per second: 4.67


SLIMElasticNetRecommender: Processed 4205 (23.3%) in 15.01 min. Items per second: 4.67


SLIMElasticNetRecommender: Processed 5623 (31.1%) in 20.01 min. Items per second: 4.68


SLIMElasticNetRecommender: Processed 7045 (39.0%) in 25.01 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 8474 (46.9%) in 30.01 min. Items per second: 4.71


SLIMElasticNetRecommender: Processed 9865 (54.6%) in 35.01 min. Items per second: 4.70


SLIMElasticNetRecommender: Processed 11268 (62.4%) in 40.01 min. Items per second: 4.69


SLIMElasticNetRecommender: Processed 12652 (70.1%) in 45.02 min. Items per second: 4.68


SLIMElasticNetRecommender: Processed 14022 (77.6%) in 50.02 min. Items per second: 4.67


In [None]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

# Create final recommendations

In [None]:
#let's fit the model with the hyperparamethers obtained from the previous search and evaluate them on validation set

recommender = SLIMElasticNetRecommender(URM_all)
recommender.fit()
#evaluator_valid.evaluateRecommender(recommender)

In [None]:
recommender.save_model(output_folder_path, file_name = recommender.RECOMMENDER_NAME + "_my_own_save.zip" )

In [None]:
test_users = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_target_users_test.csv')
test_users

In [None]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user,cutoff = 10))

In [None]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])
    
test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('submission.csv', index=False)