In [2]:
!pip install scikit-learn==0.24.2 &> /dev/null
!pip install scikit-optimize==0.9.0 &> /dev/null

In [1]:
# importing this library so the seed stays the same
import os
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample, split_train_in_two_percentage_user_wise
import Utils.not_random
from Data.RecSys2022 import RecSys2022, RecSys2022URMType
from Data_manager.split_functions.split_train_validation_random_holdout import (
    split_train_in_two_percentage_global_sample,
    split_train_in_two_percentage_user_wise)
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

# building the dataset
# later we can call the build method to get different URM
dataset = RecSys2022()
dataset.build(type=RecSys2022URMType.ONE_INTERACTED)

urm = dataset.get_urm()
icm = dataset.get_icm()

urm_train_validation, urm_test = split_train_in_two_percentage_global_sample(urm, train_percentage = 0.8)
urm_train, urm_validation = split_train_in_two_percentage_global_sample(urm_train_validation, train_percentage = 0.8)

evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

recommender_class = RP3betaRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation,
                                           evaluator_test=evaluator_test)

hyperparameters_range_dictionary = {
    "topK": Integer(30, 80),
    "normalize_similarity": Categorical([True, False]),
    "alpha": Real(0.4, 0.9),
    "beta": Real(0.05, 0.4),
}

recommender_input_args = SearchInputRecommenderArgs(
    # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_POSITIONAL_ARGS=[urm_train],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={
       "min_rating": 0,
       "implicit": True
    },
    EARLYSTOPPING_KEYWORD_ARGS={},
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_POSITIONAL_ARGS=[urm_train_validation],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={
       "min_rating": 0,
        "implicit": True
    },
    EARLYSTOPPING_KEYWORD_ARGS={},
)


output_folder_path = "result_experiments_3/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50 
n_random_starts = int(n_cases*0.5)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test=recommender_input_args_last_test,
                            hyperparameter_search_space=hyperparameters_range_dictionary,
                            n_cases=n_cases,
                            n_random_starts=n_random_starts,
                            save_model="last",
                            output_folder_path=output_folder_path,  # Where to save the results
                            output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
                            metric_to_optimize=metric_to_optimize,
                            cutoff_to_optimize=cutoff_to_optimize,
                            )


Tensorflow is not available
Setting seed random library, os and numpy seed to 18
Unzipping dataset...
Loading interactions...
Loading features...
Loading target ids...
Cleaning up...
Building URM and ICM with criteria ONE_INTERACTED...
EvaluatorHoldout: Ignoring 1409 ( 3.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 773 ( 1.9%) Users that have less than 1 test interactions
Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 67, 'normalize_similarity': False, 'alpha': 0.5357182927704416, 'beta': 0.3980762571933971}
RP3betaRecommender: URM Detected 1 ( 0.0%) users with no interactions.
RP3betaRecommender: Similarity column 19630 (100.0%), 5083.66 column/sec. Elapsed time 3.86 sec
EvaluatorHoldout: Processed 40220 (100.0%) in 14.33 sec. Users per second: 2806
SearchBayesianSkopt: New best config found. Config 0: {'topK': 67, 'normalize_similarity': False, 'alpha': 0.5357182927704416, 'beta': 0.398076257

In [3]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,normalize_similarity,alpha,beta
0,67,False,0.535718,0.398076
1,56,True,0.887582,0.145495
2,67,True,0.833967,0.139946
3,60,False,0.783489,0.054664
4,46,True,0.62492,0.055874
5,80,True,0.754509,0.054998
6,69,False,0.737508,0.211203
7,42,True,0.560169,0.099176
8,64,True,0.629908,0.344103
9,76,True,0.871603,0.194774


In [11]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.000134,0.000351,0.000348,4.5e-05,0.000115,0.000446,0.000251,0.000194,0.001343,0.000446,...,0.965865,0.001297,0.965865,0.270133,12.77075,1.000227,0.544292,0.972296,0.049352,0.613139
1,10,0.000164,0.000376,0.000356,5.2e-05,0.000123,0.0005,0.000276,0.000225,0.001592,0.000506,...,0.965865,0.001537,0.965865,0.307271,12.961572,1.000256,0.619122,0.986824,0.051011,0.611285
2,10,0.000662,0.001277,0.001249,0.000189,0.000367,0.0018,0.000952,0.000865,0.006367,0.001841,...,0.965865,0.00615,0.965865,0.180113,12.099375,0.999952,0.36291,0.921181,0.051261,0.612822
3,10,0.000142,0.00037,0.000366,3.8e-05,0.000101,0.000381,0.00024,0.000204,0.001418,0.000381,...,0.965865,0.001369,0.965865,0.051124,10.368741,0.999225,0.10301,0.78942,0.03418,0.632885
4,10,0.000127,0.000295,0.000286,3.3e-05,6.8e-05,0.000334,0.00019,0.000176,0.001268,0.000334,...,0.965865,0.001225,0.965865,0.003078,6.145884,0.975251,0.006201,0.467915,0.022904,0.655034
5,10,0.000122,0.0003,0.000297,4.5e-05,0.000114,0.000446,0.000236,0.000173,0.001219,0.000446,...,0.965865,0.001177,0.965865,0.30733,12.958382,1.000261,0.619242,0.986581,0.052026,0.610223
6,10,0.000127,0.000347,0.000343,3.7e-05,8.9e-05,0.000369,0.00022,0.000185,0.001268,0.000369,...,0.965865,0.001225,0.965865,0.132813,11.635447,0.999725,0.267606,0.88586,0.04514,0.618558
7,10,0.000157,0.00046,0.000452,4e-05,0.000111,0.000397,0.000272,0.000233,0.001567,0.000397,...,0.965865,0.001513,0.965865,0.1472,11.797714,0.99984,0.296593,0.898214,0.041712,0.622929
8,10,0.000127,0.000354,0.000351,3.2e-05,7.9e-05,0.000317,0.000209,0.000186,0.001268,0.000317,...,0.965865,0.001225,0.965865,0.096477,11.121602,0.999302,0.194392,0.846739,0.040897,0.624113
9,10,0.000154,0.000362,0.000359,4e-05,9.1e-05,0.000405,0.00024,0.000216,0.001542,0.000405,...,0.965865,0.001489,0.965865,0.156852,11.811578,0.999712,0.316043,0.89927,0.05849,0.605882


In [4]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 80,
 'normalize_similarity': True,
 'alpha': 0.7195144323319888,
 'beta': 0.2298978830101901}

In [5]:
import pandas as pd

In [6]:
pd.merge(search_metadata["hyperparameters_df"], pd.Series(pd.Series(search_metadata["result_on_validation_df"]["MAP"]).reset_index()["MAP"]), left_index=True, right_index=True).sort_values(by=["MAP"], ascending=False).head(30)

Unnamed: 0,topK,normalize_similarity,alpha,beta,MAP
27,80,True,0.719514,0.229898,0.020148
38,71,True,0.467879,0.221567,0.020097
32,79,True,0.552981,0.212527,0.020084
33,55,True,0.695302,0.221572,0.02007
39,63,True,0.819987,0.210755,0.020043
47,80,True,0.4,0.203453,0.02002
9,76,True,0.871603,0.194774,0.020002
18,70,True,0.685955,0.370811,0.019999
8,64,True,0.629908,0.344103,0.019993
23,60,True,0.853864,0.169263,0.019955
