In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
import time

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

In [2]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1521 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 909 ( 2.2%) Users that have less than 1 test interactions


In [3]:
recommender_class = SLIMElasticNetRecommender

In [4]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

In [5]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "topK": Integer(200,450),
    "l1_ratio": Real(low = 0.001, high = 0.01, prior = 'log-uniform'),
    "alpha": Real(low = 0.01, high = 0.1, prior = 'log-uniform'),
    #"positive_only": Categorical([True]),
}

In [6]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS ={}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS ={}
)

In [9]:
hyperparameterSearch.search(recommender_input_args = recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 338, 'l1_ratio': 0.0021732039616272924, 'alpha': 0.03125877774043834}
SLIMElasticNetRecommender: Processed 16289 (66.5%) in 5.00 min. Items per second: 54.29
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 7.24 min. Items per second: 56.39
EvaluatorHoldout: Processed 40108 (100.0%) in 16.61 sec. Users per second: 2415
SearchBayesianSkopt: New best config found. Config 0: {'topK': 338, 'l1_ratio': 0.0021732039616272924, 'alpha': 0.03125877774043834} - results: PRECISION: 0.0390097, PRECISION_RECALL_MIN_DEN: 0.0747636, RECALL: 0.0717969, MAP: 0.0176974, MAP_MIN_DEN: 0.0334731, MRR: 0.1319938, NDCG: 0.0670184, F1: 0.0505525, HIT_RATE: 0.2880473, ARHR_ALL_HITS: 0.1523321, NOVELTY: 0.0040258, AVERAGE_POPULARITY: 0.2665903, DIVERSITY_MEAN_INTER_LIST: 0.8843539, DIVERSITY_HERFINDAHL: 0.9884332, COVERAGE_ITEM: 0.1360428, COVERAGE_ITEM_HIT: 0.0276656, ITEMS_IN_GT: 0.9868609, COVERA

SLIMElasticNetRecommender: Processed 24507 (100.0%) in 3.20 min. Items per second: 127.71
EvaluatorHoldout: Processed 40108 (100.0%) in 10.78 sec. Users per second: 3720
SearchBayesianSkopt: Config 6 is suboptimal. Config: {'topK': 276, 'l1_ratio': 0.0022626504790929773, 'alpha': 0.094461459132593} - results: PRECISION: 0.0350429, PRECISION_RECALL_MIN_DEN: 0.0676487, RECALL: 0.0650413, MAP: 0.0156494, MAP_MIN_DEN: 0.0298456, MRR: 0.1195748, NDCG: 0.0602948, F1: 0.0455463, HIT_RATE: 0.2656328, ARHR_ALL_HITS: 0.1363542, NOVELTY: 0.0039186, AVERAGE_POPULARITY: 0.3094188, DIVERSITY_MEAN_INTER_LIST: 0.8267677, DIVERSITY_HERFINDAHL: 0.9826747, COVERAGE_ITEM: 0.0766312, COVERAGE_ITEM_HIT: 0.0194638, ITEMS_IN_GT: 0.9868609, COVERAGE_USER: 0.9634630, COVERAGE_USER_HIT: 0.2559274, USERS_IN_GT: 0.9634630, DIVERSITY_GINI: 0.0057313, SHANNON_ENTROPY: 7.1074347, RATIO_DIVERSITY_HERFINDAHL: 0.9829994, RATIO_DIVERSITY_GINI: 0.0113320, RATIO_SHANNON_ENTROPY: 0.5268483, RATIO_AVERAGE_POPULARITY: 6.07515

In [10]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['time_on_validation_total', 'time_on_test_total', 'time_on_test_avg', 'metric_to_optimize', 'result_on_last', 'result_on_test_df', 'algorithm_name_search', 'result_on_test_best', 'cutoff_to_optimize', 'hyperparameters_best', 'result_on_earlystopping_df', 'algorithm_name_recommender', 'time_df', 'time_on_train_total', 'exception_list', 'hyperparameters_best_index', 'hyperparameters_df', 'result_on_validation_df', 'time_on_validation_avg', 'time_on_train_avg', 'time_on_last_df', 'result_on_validation_best'])

In [11]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,l1_ratio,alpha
0,338,0.002173,0.031259
1,246,0.001573,0.078007
2,258,0.004157,0.021196
3,405,0.00103,0.01
4,331,0.004334,0.011878
5,200,0.001,0.01
6,276,0.002263,0.094461
7,433,0.006013,0.039674
8,204,0.009505,0.028124
9,223,0.005445,0.012488


In [12]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.03901,0.074764,0.071797,0.017697,0.033473,0.131994,0.067018,0.050552,0.288047,0.152332,...,0.963463,0.277523,0.963463,0.009625,7.801778,0.98876,0.019031,0.578317,5.234259,0.271047
1,10,0.036257,0.069788,0.067048,0.01622,0.030795,0.123057,0.062182,0.047064,0.272539,0.140791,...,0.963463,0.262581,0.963463,0.006962,7.319857,0.984792,0.013764,0.542594,5.83575,0.265361
2,10,0.039698,0.075908,0.072902,0.018131,0.034224,0.134739,0.068325,0.051404,0.292386,0.155775,...,0.963463,0.281703,0.963463,0.010625,7.991238,0.990146,0.021007,0.592361,4.980657,0.273951
3,10,0.041852,0.07966,0.076448,0.019162,0.035946,0.140159,0.071611,0.054091,0.302932,0.163254,...,0.963463,0.291864,0.963463,0.014186,8.475044,0.992982,0.028049,0.628224,4.363615,0.281264
4,10,0.041176,0.078787,0.075681,0.018828,0.035387,0.138516,0.070647,0.053334,0.300239,0.160888,...,0.963463,0.289269,0.963463,0.012943,8.303895,0.991972,0.025591,0.615537,4.604165,0.278422
5,10,0.041805,0.079496,0.07627,0.019141,0.035896,0.139945,0.07149,0.054007,0.302533,0.163013,...,0.963463,0.291479,0.963463,0.014937,8.542073,0.993328,0.029533,0.633193,4.270202,0.282406
6,10,0.035043,0.067649,0.065041,0.015649,0.029846,0.119575,0.060295,0.045546,0.265633,0.136354,...,0.963463,0.255927,0.963463,0.005731,7.107435,0.982999,0.011332,0.526848,6.075159,0.263828
7,10,0.037175,0.071227,0.068454,0.016775,0.031841,0.126405,0.063908,0.048183,0.277177,0.145125,...,0.963463,0.267049,0.963463,0.007127,7.497854,0.98683,0.014091,0.555789,5.544097,0.269298
8,10,0.037933,0.072658,0.069821,0.017233,0.032589,0.129029,0.065275,0.049158,0.280792,0.148616,...,0.963463,0.270533,0.963463,0.007635,7.647445,0.988393,0.015097,0.566877,5.280683,0.272056
9,10,0.040932,0.078327,0.075235,0.018712,0.035203,0.137927,0.070271,0.053019,0.298893,0.160051,...,0.963463,0.287972,0.963463,0.01289,8.292601,0.99195,0.025486,0.6147,4.607974,0.278303


In [13]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 405, 'l1_ratio': 0.0010299956370568744, 'alpha': 0.01}

In [16]:
recommender = SLIMElasticNetRecommender(URM_all)
recommender.fit(topK=405, l1_ratio=0.0010299956370568744, alpha=0.01)

SLIMElasticNetRecommender: Processed 6988 (28.5%) in 5.00 min. Items per second: 23.29
SLIMElasticNetRecommender: Processed 14205 (58.0%) in 10.00 min. Items per second: 23.67
SLIMElasticNetRecommender: Processed 21865 (89.2%) in 15.00 min. Items per second: 24.29
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 16.72 min. Items per second: 24.43


In [17]:
recommender.save_model(output_folder_path, file_name = recommender.RECOMMENDER_NAME + "_my_own_save.zip" )

SLIMElasticNetRecommender: Saving model in file 'Experiments/SLIMElasticNetRecommender_my_own_save.zip'
SLIMElasticNetRecommender: Saving complete


In [36]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [37]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [38]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions\Submission_04_RP3Beta_MixedRatings.csv', index=False)