In [2]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [3]:
URM_all = load_urm()

URM_train = sps.load_npz('Dataset/Split/URM_train.npz')
URM_test =  sps.load_npz('Dataset/Split/URM_test.npz')
URM_validation = sps.load_npz('Dataset/Split/URM_validation.npz')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1590 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 959 ( 2.3%) Users that have less than 1 test interactions


## RP3 Beta Recommender

In [4]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_class = RP3betaRecommender

In [5]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 500
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

### Hyperparameter Tuning

In [6]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "alpha": Real(low=0, high=1, prior='uniform'),
    "beta": Real(low=0, high=1, prior='uniform'),
    "topK": Integer(1, 800),
    "implicit": Categorical([True, False])
}

We create a bayesian optimizer object, we pass the recommender and the evaluator

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

We provide data needed to create the instance of the model, one on the URM_train, the other on URM_all

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [9]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

### Bayesian Search

In [10]:
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.20292943880839825, 'beta': 0.2212987251629815, 'topK': 572, 'implicit': True}
RP3betaRecommender: Similarity column 24507 (100.0%), 673.02 column/sec. Elapsed time 36.41 sec
EvaluatorHoldout: Processed 40039 (100.0%) in 35.08 sec. Users per second: 1141
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.20292943880839825, 'beta': 0.2212987251629815, 'topK': 572, 'implicit': True} - results: PRECISION: 0.0370264, PRECISION_RECALL_MIN_DEN: 0.0717681, RECALL: 0.0690555, MAP: 0.0163139, MAP_MIN_DEN: 0.0316289, MRR: 0.1235570, NDCG: 0.0635077, F1: 0.0482057, HIT_RATE: 0.2783536, ARHR_ALL_HITS: 0.1415759, NOVELTY: 0.0040260, AVERAGE_POPULARITY: 0.2947674, DIVERSITY_MEAN_INTER_LIST: 0.8346703, DIVERSITY_HERFINDAHL: 0.9834649, COVERAGE_ITEM: 0.3632840, COVERAGE_ITEM_HIT: 0.0342351, ITEMS_IN_GT: 0.9847391, COVERAGE_USER: 0.9618055, COVERAGE_USER_HIT: 0.2677220, USERS_IN_

### Search Results

In [11]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['time_on_test_avg', 'time_on_validation_avg', 'hyperparameters_best', 'result_on_last', 'time_on_train_avg', 'time_on_test_total', 'hyperparameters_df', 'result_on_validation_best', 'result_on_validation_df', 'cutoff_to_optimize', 'time_df', 'hyperparameters_best_index', 'result_on_test_df', 'time_on_validation_total', 'algorithm_name_search', 'time_on_train_total', 'exception_list', 'time_on_last_df', 'result_on_earlystopping_df', 'algorithm_name_recommender', 'metric_to_optimize', 'result_on_test_best'])

In [12]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,alpha,beta,topK,implicit
0,0.202929,0.221299,572,True
1,0.668144,0.042523,15,False
2,0.757863,0.579954,240,True
3,0.759213,0.880315,511,False
4,0.901966,0.290338,762,False
...,...,...,...,...
495,0.743823,0.325863,59,True
496,0.996132,0.82426,629,True
497,0.988118,0.11047,398,False
498,0.736456,0.325353,59,True


In [13]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.037026,0.071768,0.069056,0.016314,0.031629,0.123557,0.063508,0.048206,0.278354,0.141576,...,0.961805,0.267722,0.961805,0.01708,7.64087,0.98379,0.033738,0.566361,5.776645,0.271049
1,10,0.024469,0.044539,0.042389,0.011926,0.022464,0.095189,0.043985,0.031027,0.191338,0.106181,...,0.961805,0.18403,0.961805,0.136775,11.662922,0.999448,0.270174,0.864487,1.111383,0.348407
2,10,0.039187,0.07471,0.071813,0.017854,0.033952,0.132953,0.067481,0.050705,0.288069,0.153592,...,0.961805,0.277066,0.961805,0.107489,10.020323,0.994837,0.212325,0.742733,3.547596,0.311944
3,10,0.018185,0.030412,0.028513,0.007406,0.012315,0.05534,0.027039,0.022207,0.136717,0.063781,...,0.961805,0.131495,0.961805,0.271403,12.890115,0.999933,0.536105,0.955449,0.352524,0.409082
4,10,0.036592,0.071154,0.068537,0.016316,0.031724,0.123702,0.063351,0.047711,0.274657,0.14171,...,0.961805,0.264167,0.961805,0.064785,8.253053,0.983478,0.127971,0.611738,5.730123,0.281061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,10,0.041045,0.077676,0.074548,0.01878,0.035885,0.139294,0.070783,0.052941,0.298983,0.161174,...,0.961805,0.287564,0.961805,0.088264,10.106692,0.996515,0.174349,0.749135,3.17683,0.310291
496,10,0.018537,0.029865,0.027767,0.007653,0.012225,0.055042,0.026877,0.022232,0.134369,0.064646,...,0.961805,0.129237,0.961805,0.28943,13.025703,1.000009,0.571715,0.9655,0.385076,0.411292
497,10,0.037111,0.072305,0.069649,0.016192,0.031526,0.123201,0.063558,0.048422,0.279852,0.140879,...,0.961805,0.269163,0.961805,0.020368,7.552725,0.983079,0.040234,0.559828,5.92057,0.269449
498,10,0.041035,0.077695,0.07457,0.01878,0.035888,0.139242,0.070779,0.052939,0.298959,0.161144,...,0.961805,0.28754,0.961805,0.088324,10.109806,0.996526,0.174467,0.749366,3.171111,0.310353


This are the best hyperparameters found by the bayesian search -> We will train our model using these

In [14]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'alpha': 0.7513264232341178,
 'beta': 0.3247853600339675,
 'topK': 58,
 'implicit': True}