In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

Cloning into 'Recommender-Systems-Challenge-2022'...
remote: Enumerating objects: 571, done.[K
remote: Counting objects: 100% (167/167), done.[K
remote: Compressing objects: 100% (119/119), done.[K
remote: Total 571 (delta 87), reused 109 (delta 47), pack-reused 404[K
Receiving objects: 100% (571/571), 91.68 MiB | 21.31 MiB/s, done.
Resolving deltas: 100% (259/259), done.
Updating files: 100% (236/236), done.


In [2]:
URM_all = load_urm()

URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1479 ( 3.6%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 892 ( 2.1%) Users that have less than 1 test interactions


In [3]:
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender
recommender_class = IALSRecommender

In [4]:
import os

output_folder_path = "../Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [5]:
from skopt.space import Real, Integer, Categorical
#MF IALS is machine learning-based matrix factorization technique
#the tuning hyperparam are typical of ML models to drive the learning process

hyperparameters_range_dictionary = {
    "num_factors": Integer(1, 100),
    "confidence_scaling": Categorical(["linear", "log"]),
    "alpha": Real(low=1e-2, high=1.0, prior= "log-uniform"),
    "epsilon": Real(low=1e-1, high=10.0, prior= "log-uniform"),
    "reg": Real(low = 1e-5, high = 1e-3, prior = 'log-uniform')
}

In [6]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation)

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [9]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [10]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 71, 'confidence_scaling': 'linear', 'alpha': 0.11012384068429501, 'epsilon': 2.861725436784963, 'reg': 2.697622556664433e-05}
IALSRecommender: Epoch 1 of 100. Elapsed time 27.92 sec
IALSRecommender: Epoch 2 of 100. Elapsed time 56.00 sec
IALSRecommender: Epoch 3 of 100. Elapsed time 1.39 min
IALSRecommender: Epoch 4 of 100. Elapsed time 1.84 min
IALSRecommender: Epoch 5 of 100. Elapsed time 2.30 min
IALSRecommender: Epoch 6 of 100. Elapsed time 2.75 min
IALSRecommender: Epoch 7 of 100. Elapsed time 3.21 min
IALSRecommender: Epoch 8 of 100. Elapsed time 3.64 min
IALSRecommender: Epoch 9 of 100. Elapsed time 4.09 min
IALSRecommender: Epoch 10 of 100. Elapsed time 4.55 min
IALSRecommender: Epoch 11 of 100. Elapsed time 5.00 min
IALSRecommender: Epoch 12 of 100. Elapsed time 5.46 min
IALSRecommender: Epoch 13 of 100. Elapsed time 5.91 min
IALSRecommender: Epoch 14 of 100. Elap

In [11]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['exception_list', 'time_on_last_df', 'time_on_test_total', 'result_on_test_df', 'metric_to_optimize', 'result_on_last', 'algorithm_name_recommender', 'result_on_test_best', 'time_on_validation_total', 'result_on_validation_best', 'time_on_train_total', 'hyperparameters_best', 'time_df', 'result_on_earlystopping_df', 'algorithm_name_search', 'time_on_train_avg', 'time_on_validation_avg', 'hyperparameters_df', 'cutoff_to_optimize', 'time_on_test_avg', 'result_on_validation_df', 'hyperparameters_best_index'])

In [12]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,num_factors,confidence_scaling,alpha,epsilon,reg,epochs
0,71,linear,0.110124,2.861725,2.7e-05,30.0
1,33,linear,0.032657,2.609958,1.2e-05,45.0
2,20,linear,0.748869,1.634582,2e-05,90.0
3,13,linear,0.061005,0.969859,0.000247,75.0
4,62,log,0.010007,0.178806,0.00068,90.0
5,94,log,1.0,0.531647,0.001,90.0
6,12,log,0.31582,1.189158,0.000982,60.0
7,10,log,0.071363,0.794818,3.3e-05,15.0
8,53,linear,1.0,8.071768,1e-05,45.0
9,2,linear,0.156987,3.774204,0.000189,30.0


In [13]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.025238,0.04682,0.044647,0.011177,0.02061,0.088815,0.042625,0.032247,0.198705,0.099298,...,0.964472,0.191645,0.964472,0.00807,8.044558,0.995508,0.015952,0.596282,2.341783,0.290224
1,10,0.025776,0.047666,0.045419,0.011068,0.020164,0.087082,0.042446,0.032888,0.201793,0.097803,...,0.964472,0.194624,0.964472,0.005158,7.394451,0.992738,0.010196,0.548095,3.202064,0.278371
2,10,0.029218,0.055167,0.052771,0.012535,0.02327,0.097284,0.048482,0.037611,0.225604,0.110033,...,0.964472,0.217589,0.964472,0.004411,7.164903,0.991406,0.008718,0.53108,4.229541,0.269688
3,10,0.025357,0.047125,0.044914,0.010755,0.019537,0.084359,0.041423,0.032414,0.19863,0.094871,...,0.964472,0.191573,0.964472,0.002814,6.505477,0.985988,0.005562,0.482202,4.81189,0.263367
4,10,0.024907,0.046182,0.044013,0.010953,0.020204,0.087351,0.041917,0.031811,0.196787,0.097457,...,0.964472,0.189796,0.964472,0.00736,7.909618,0.995036,0.014548,0.58628,2.439014,0.288407
5,10,0.0288,0.053947,0.051605,0.012907,0.02412,0.101073,0.049195,0.036968,0.223138,0.113814,...,0.964472,0.215211,0.964472,0.011949,8.606734,0.997155,0.023619,0.637952,2.258357,0.293855
6,10,0.026687,0.050101,0.047822,0.011363,0.020943,0.088458,0.043964,0.034257,0.207273,0.099874,...,0.964472,0.199909,0.964472,0.002849,6.524901,0.986175,0.005631,0.483641,5.038822,0.262386
7,10,0.025798,0.048522,0.046318,0.0108,0.019753,0.084602,0.042055,0.033139,0.201395,0.095243,...,0.964472,0.19424,0.964472,0.002402,6.281117,0.983526,0.004747,0.465572,5.25107,0.260824
8,10,0.029313,0.054618,0.052196,0.012987,0.024103,0.101392,0.049498,0.037542,0.226401,0.114347,...,0.964472,0.218357,0.964472,0.008225,8.073442,0.995628,0.016257,0.598423,2.873786,0.284225
9,10,0.020986,0.040706,0.039003,0.009135,0.017774,0.074342,0.036381,0.027289,0.169265,0.082251,...,0.964472,0.163252,0.964472,0.000887,4.645315,0.956583,0.001753,0.344322,7.912921,0.242524


In [14]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'num_factors': 53,
 'confidence_scaling': 'linear',
 'alpha': 1.0,
 'epsilon': 8.071768337775882,
 'reg': 1e-05,
 'epochs': 45.0}