In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [2]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1466 ( 3.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 922 ( 2.2%) Users that have less than 1 test interactions


## RP3 Beta Recommender

In [3]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_class = RP3betaRecommender

In [4]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

### Hyperparameter Tuning

In [5]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "alpha": Real(low=0, high=1, prior='uniform'),
    "beta": Real(low=0, high=1, prior='uniform'),
    "topK": Integer(1, 800),
    "implicit": Categorical([True, False])
}

We create a bayesian optimizer object, we pass the recommender and the evaluator

In [6]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

We provide data needed to create the instance of the model, one on the URM_train, the other on URM_all

In [7]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [8]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

### Bayesian Search

In [9]:
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.46307093805412436, 'beta': 0.9489735235554655, 'topK': 627, 'implicit': True}
RP3betaRecommender: Similarity column 24507 (100.0%), 1347.42 column/sec. Elapsed time 18.19 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 20.00 sec. Users per second: 2008
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.46307093805412436, 'beta': 0.9489735235554655, 'topK': 627, 'implicit': True} - results: PRECISION: 0.0139855, PRECISION_RECALL_MIN_DEN: 0.0233559, RECALL: 0.0217678, MAP: 0.0057015, MAP_MIN_DEN: 0.0095424, MRR: 0.0435306, NDCG: 0.0209532, F1: 0.0170297, HIT_RATE: 0.1080099, ARHR_ALL_HITS: 0.0496792, NOVELTY: 0.0061941, AVERAGE_POPULARITY: 0.0106293, DIVERSITY_MEAN_INTER_LIST: 0.9961978, DIVERSITY_HERFINDAHL: 0.9996173, COVERAGE_ITEM: 0.7933244, COVERAGE_ITEM_HIT: 0.0594524, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.1042062, USERS_IN

RP3betaRecommender: Similarity column 24507 (100.0%), 1440.47 column/sec. Elapsed time 17.01 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 18.06 sec. Users per second: 2224
SearchBayesianSkopt: New best config found. Config 6: {'alpha': 0.9250016285303471, 'beta': 0.4655271047485302, 'topK': 519, 'implicit': False} - results: PRECISION: 0.0380375, PRECISION_RECALL_MIN_DEN: 0.0719544, RECALL: 0.0690276, MAP: 0.0175498, MAP_MIN_DEN: 0.0330196, MRR: 0.1296561, NDCG: 0.0654703, F1: 0.0490475, HIT_RATE: 0.2779673, ARHR_ALL_HITS: 0.1502560, NOVELTY: 0.0044909, AVERAGE_POPULARITY: 0.2277486, DIVERSITY_MEAN_INTER_LIST: 0.9029699, DIVERSITY_HERFINDAHL: 0.9902947, COVERAGE_ITEM: 0.7899784, COVERAGE_ITEM_HIT: 0.0403966, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2681784, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.1063162, SHANNON_ENTROPY: 9.4558428, RATIO_DIVERSITY_HERFINDAHL: 0.9906228, RATIO_DIVERSITY_GINI: 0.2102880, RATIO_SHANNON_ENTROPY: 0.7009593, RATIO_AVERAG

RP3betaRecommender: Similarity column 24507 (100.0%), 2044.39 column/sec. Elapsed time 11.99 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 11.57 sec. Users per second: 3472
SearchBayesianSkopt: New best config found. Config 12: {'alpha': 0.1669908672667844, 'beta': 0.33206759040092587, 'topK': 139, 'implicit': True} - results: PRECISION: 0.0401140, PRECISION_RECALL_MIN_DEN: 0.0762549, RECALL: 0.0731068, MAP: 0.0181591, MAP_MIN_DEN: 0.0342909, MRR: 0.1323625, NDCG: 0.0681946, F1: 0.0518033, HIT_RATE: 0.2909892, ARHR_ALL_HITS: 0.1543819, NOVELTY: 0.0044335, AVERAGE_POPULARITY: 0.1896293, DIVERSITY_MEAN_INTER_LIST: 0.9426553, DIVERSITY_HERFINDAHL: 0.9942632, COVERAGE_ITEM: 0.6109275, COVERAGE_ITEM_HIT: 0.0544742, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2807418, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.0519666, SHANNON_ENTROPY: 9.4078255, RATIO_DIVERSITY_HERFINDAHL: 0.9945926, RATIO_DIVERSITY_GINI: 0.1027872, RATIO_SHANNON_ENTROPY: 0.6973998, RATIO_AVERA

RP3betaRecommender: Similarity column 24507 (100.0%), 2379.18 column/sec. Elapsed time 10.30 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 8.33 sec. Users per second: 4821
SearchBayesianSkopt: Config 18 is suboptimal. Config: {'alpha': 0.946826163687336, 'beta': 0.0020302740555184777, 'topK': 3, 'implicit': False} - results: PRECISION: 0.0120708, PRECISION_RECALL_MIN_DEN: 0.0209557, RECALL: 0.0196503, MAP: 0.0067510, MAP_MIN_DEN: 0.0123022, MRR: 0.0579353, NDCG: 0.0235641, F1: 0.0149550, HIT_RATE: 0.1006150, ARHR_ALL_HITS: 0.0623696, NOVELTY: 0.0059058, AVERAGE_POPULARITY: 0.0189438, DIVERSITY_MEAN_INTER_LIST: 0.8804359, DIVERSITY_HERFINDAHL: 0.9880414, COVERAGE_ITEM: 0.3063206, COVERAGE_ITEM_HIT: 0.0354593, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.0970718, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.1024967, SHANNON_ENTROPY: 10.0078872, RATIO_DIVERSITY_HERFINDAHL: 0.9883687, RATIO_DIVERSITY_GINI: 0.2027333, RATIO_SHANNON_ENTROPY: 0.7418822, RATIO_AVERAG

RP3betaRecommender: Similarity column 24507 (100.0%), 1815.80 column/sec. Elapsed time 13.50 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 12.67 sec. Users per second: 3169
SearchBayesianSkopt: Config 24 is suboptimal. Config: {'alpha': 0.0, 'beta': 0.279349881264649, 'topK': 247, 'implicit': False} - results: PRECISION: 0.0387023, PRECISION_RECALL_MIN_DEN: 0.0738602, RECALL: 0.0708537, MAP: 0.0173949, MAP_MIN_DEN: 0.0329827, MRR: 0.1284664, NDCG: 0.0659287, F1: 0.0500603, HIT_RATE: 0.2841172, ARHR_ALL_HITS: 0.1489054, NOVELTY: 0.0042632, AVERAGE_POPULARITY: 0.2279528, DIVERSITY_MEAN_INTER_LIST: 0.9096449, DIVERSITY_HERFINDAHL: 0.9909622, COVERAGE_ITEM: 0.4702330, COVERAGE_ITEM_HIT: 0.0465581, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2741118, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.0319362, SHANNON_ENTROPY: 8.6934343, RATIO_DIVERSITY_HERFINDAHL: 0.9912905, RATIO_DIVERSITY_GINI: 0.0631681, RATIO_SHANNON_ENTROPY: 0.6444422, RATIO_AVERAGE_POPULARITY: 4.

RP3betaRecommender: Similarity column 24507 (100.0%), 1966.29 column/sec. Elapsed time 12.46 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 12.70 sec. Users per second: 3163
SearchBayesianSkopt: New best config found. Config 30: {'alpha': 1.0, 'beta': 0.4520495673133021, 'topK': 167, 'implicit': True} - results: PRECISION: 0.0393721, PRECISION_RECALL_MIN_DEN: 0.0748036, RECALL: 0.0717910, MAP: 0.0182179, MAP_MIN_DEN: 0.0342490, MRR: 0.1335701, NDCG: 0.0677998, F1: 0.0508543, HIT_RATE: 0.2864826, ARHR_ALL_HITS: 0.1553910, NOVELTY: 0.0044748, AVERAGE_POPULARITY: 0.2037715, DIVERSITY_MEAN_INTER_LIST: 0.9307964, DIVERSITY_HERFINDAHL: 0.9930773, COVERAGE_ITEM: 0.7108989, COVERAGE_ITEM_HIT: 0.0429265, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2763939, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.0785295, SHANNON_ENTROPY: 9.4784983, RATIO_DIVERSITY_HERFINDAHL: 0.9934063, RATIO_DIVERSITY_GINI: 0.1553274, RATIO_SHANNON_ENTROPY: 0.7026388, RATIO_AVERAGE_POPULARITY: 4

RP3betaRecommender: Similarity column 24507 (100.0%), 1943.27 column/sec. Elapsed time 12.61 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 12.70 sec. Users per second: 3161
SearchBayesianSkopt: Config 36 is suboptimal. Config: {'alpha': 1.0, 'beta': 0.34833876824838783, 'topK': 179, 'implicit': True} - results: PRECISION: 0.0395439, PRECISION_RECALL_MIN_DEN: 0.0755935, RECALL: 0.0725733, MAP: 0.0179838, MAP_MIN_DEN: 0.0340157, MRR: 0.1325159, NDCG: 0.0677609, F1: 0.0511934, HIT_RATE: 0.2889724, ARHR_ALL_HITS: 0.1538371, NOVELTY: 0.0043470, AVERAGE_POPULARITY: 0.2253811, DIVERSITY_MEAN_INTER_LIST: 0.9124907, DIVERSITY_HERFINDAHL: 0.9912468, COVERAGE_ITEM: 0.6811523, COVERAGE_ITEM_HIT: 0.0403966, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2787960, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.0619495, SHANNON_ENTROPY: 9.0225179, RATIO_DIVERSITY_HERFINDAHL: 0.9915752, RATIO_DIVERSITY_GINI: 0.1225330, RATIO_SHANNON_ENTROPY: 0.6688370, RATIO_AVERAGE_POPULARITY: 4

RP3betaRecommender: Similarity column 24507 (100.0%), 1248.85 column/sec. Elapsed time 19.62 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 20.89 sec. Users per second: 1923
SearchBayesianSkopt: Config 42 is suboptimal. Config: {'alpha': 0.04670192546648145, 'beta': 0.9703996486044255, 'topK': 797, 'implicit': True} - results: PRECISION: 0.0114384, PRECISION_RECALL_MIN_DEN: 0.0196540, RECALL: 0.0184208, MAP: 0.0046281, MAP_MIN_DEN: 0.0080304, MRR: 0.0368166, NDCG: 0.0175809, F1: 0.0141132, HIT_RATE: 0.0916017, ARHR_ALL_HITS: 0.0411890, NOVELTY: 0.0062015, AVERAGE_POPULARITY: 0.0085342, DIVERSITY_MEAN_INTER_LIST: 0.9957626, DIVERSITY_HERFINDAHL: 0.9995738, COVERAGE_ITEM: 0.7901008, COVERAGE_ITEM_HIT: 0.0585547, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.0883759, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.2361671, SHANNON_ENTROPY: 12.6734370, RATIO_DIVERSITY_HERFINDAHL: 0.9999049, RATIO_DIVERSITY_GINI: 0.4671264, RATIO_SHANNON_ENTROPY: 0.9394788, RATIO_AVERA

RP3betaRecommender: Similarity column 24507 (100.0%), 1848.98 column/sec. Elapsed time 13.25 sec
EvaluatorHoldout: Processed 40163 (100.0%) in 12.32 sec. Users per second: 3261
SearchBayesianSkopt: Config 48 is suboptimal. Config: {'alpha': 0.0, 'beta': 0.23251696046827247, 'topK': 233, 'implicit': True} - results: PRECISION: 0.0388392, PRECISION_RECALL_MIN_DEN: 0.0742669, RECALL: 0.0712586, MAP: 0.0173792, MAP_MIN_DEN: 0.0329804, MRR: 0.1282572, NDCG: 0.0660284, F1: 0.0502758, HIT_RATE: 0.2847646, ARHR_ALL_HITS: 0.1487360, NOVELTY: 0.0042341, AVERAGE_POPULARITY: 0.2320477, DIVERSITY_MEAN_INTER_LIST: 0.9064131, DIVERSITY_HERFINDAHL: 0.9906391, COVERAGE_ITEM: 0.4166565, COVERAGE_ITEM_HIT: 0.0454156, ITEMS_IN_GT: 0.9855551, COVERAGE_USER: 0.9647842, COVERAGE_USER_HIT: 0.2747364, USERS_IN_GT: 0.9647842, DIVERSITY_GINI: 0.0271436, SHANNON_ENTROPY: 8.5674187, RATIO_DIVERSITY_HERFINDAHL: 0.9909673, RATIO_DIVERSITY_GINI: 0.0536886, RATIO_SHANNON_ENTROPY: 0.6351007, RATIO_AVERAGE_POPULARITY: 4

### Search Results

In [10]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['time_on_validation_total', 'time_on_test_total', 'time_on_test_avg', 'metric_to_optimize', 'result_on_last', 'result_on_test_df', 'algorithm_name_search', 'result_on_test_best', 'cutoff_to_optimize', 'hyperparameters_best', 'result_on_earlystopping_df', 'algorithm_name_recommender', 'time_df', 'time_on_train_total', 'exception_list', 'hyperparameters_best_index', 'hyperparameters_df', 'result_on_validation_df', 'time_on_validation_avg', 'time_on_train_avg', 'time_on_last_df', 'result_on_validation_best'])

In [11]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,alpha,beta,topK,implicit
0,0.463071,0.948974,627,True
1,0.702488,0.134066,384,True
2,0.895373,0.753523,520,False
3,0.582913,0.799601,631,False
4,0.305083,0.765008,406,False
5,0.380521,0.730719,480,False
6,0.925002,0.465527,519,False
7,0.412153,0.215935,486,True
8,0.521423,0.858561,579,False
9,0.707231,0.979147,585,True


In [12]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df.sort_values(by="MAP")
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.013986,0.023356,0.021768,0.005701,0.009542,0.043531,0.020953,0.01703,0.10801,0.049679,...,0.964784,0.104206,0.964784,0.256602,12.831969,0.999948,0.507545,0.951231,0.21023,0.417048
1,10,0.037134,0.071601,0.068796,0.016315,0.031167,0.122004,0.062974,0.048233,0.276722,0.14059,...,0.964784,0.266977,0.964784,0.016435,7.571867,0.984062,0.032507,0.561301,5.778751,0.269782
2,10,0.028469,0.048892,0.046184,0.012404,0.021056,0.08873,0.044193,0.035225,0.203819,0.104492,...,0.964784,0.196642,0.964784,0.253367,12.494027,0.999553,0.501147,0.926179,1.079112,0.381839
3,10,0.025446,0.04398,0.041639,0.010805,0.018635,0.079826,0.039535,0.031588,0.188706,0.092554,...,0.964784,0.182061,0.964784,0.264361,12.607922,0.999652,0.522893,0.934622,0.753979,0.386037
4,10,0.028984,0.053293,0.050953,0.012467,0.02285,0.093986,0.047346,0.03695,0.218086,0.107897,...,0.964784,0.210406,0.964784,0.183621,11.747908,0.998987,0.363193,0.87087,1.241478,0.357031
5,10,0.031875,0.058489,0.055911,0.01387,0.025314,0.102943,0.052133,0.040603,0.235366,0.119097,...,0.964784,0.227077,0.964784,0.185965,11.619696,0.99874,0.367829,0.861365,1.579204,0.350901
6,10,0.038037,0.071954,0.069028,0.01755,0.03302,0.129656,0.06547,0.049048,0.277967,0.150256,...,0.964784,0.268178,0.964784,0.106316,9.455843,0.990623,0.210288,0.700959,4.50451,0.302374
7,10,0.037049,0.071333,0.068498,0.016418,0.031394,0.122968,0.063147,0.048088,0.275453,0.141584,...,0.964784,0.265752,0.964784,0.019632,7.693481,0.984182,0.038831,0.570316,5.724166,0.27171
8,10,0.020581,0.035351,0.033335,0.008544,0.014773,0.064019,0.031641,0.02545,0.155591,0.073814,...,0.964784,0.150112,0.964784,0.26519,12.740968,0.999788,0.524533,0.944485,0.458404,0.398087
9,10,0.010751,0.017333,0.015977,0.004435,0.007141,0.03322,0.015775,0.012853,0.081493,0.038286,...,0.964784,0.078623,0.964784,0.248337,12.860976,1.000044,0.491198,0.953381,0.139678,0.427681


This are the best hyperparameters found by the bayesian search -> We will train our model using these

In [13]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'alpha': 1.0, 'beta': 0.4520495673133021, 'topK': 167, 'implicit': True}

### Recommender Testing

In [14]:
recommender = RP3betaRecommender(URM_all)
recommender.fit(alpha=1.0, beta= 0.4520495673133021, topK=167, implicit=True)

RP3betaRecommender: Similarity column 24507 (100.0%), 1691.93 column/sec. Elapsed time 14.48 sec


### Submissions

In [15]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [16]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [17]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions\Submission_04_RP3Beta_MixedRatings.csv', index=False)