In [2]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [3]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1524 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 956 ( 2.3%) Users that have less than 1 test interactions


## RP3 Beta Recommender

In [4]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_class = RP3betaRecommender

In [5]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

### Hyperparameter Tuning

In [6]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "alpha": Real(low=0, high=1, prior='uniform'),
    "beta": Real(low=0, high=1, prior='uniform'),
    "topK": Integer(1, 800),
    "implicit": Categorical([True, False])
}

We create a bayesian optimizer object, we pass the recommender and the evaluator

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

We provide data needed to create the instance of the model, one on the URM_train, the other on URM_all

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [9]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

### Bayesian Search

In [10]:
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.32608564773252674, 'beta': 0.27458231803951466, 'topK': 432, 'implicit': True}
RP3betaRecommender: Similarity column 24507 (100.0%), 525.80 column/sec. Elapsed time 46.61 sec
EvaluatorHoldout: Processed 40105 (100.0%) in 54.79 sec. Users per second: 732
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.32608564773252674, 'beta': 0.27458231803951466, 'topK': 432, 'implicit': True} - results: PRECISION: 0.0369754, PRECISION_RECALL_MIN_DEN: 0.0716020, RECALL: 0.0687944, MAP: 0.0164596, MAP_MIN_DEN: 0.0315123, MRR: 0.1243544, NDCG: 0.0710434, F1: 0.0480988, HIT_RATE: 0.2765241, ARHR_ALL_HITS: 0.1424863, NOVELTY: 0.0040926, AVERAGE_POPULARITY: 0.2782012, DIVERSITY_MEAN_INTER_LIST: 0.8494193, DIVERSITY_HERFINDAHL: 0.9849398, COVERAGE_ITEM: 0.4219203, COVERAGE_ITEM_HIT: 0.0416616, ITEMS_IN_GT: 0.9870241, COVERAGE_USER: 0.9633909, COVERAGE_USER_HIT: 0.2664008, USERS_IN

### Search Results

In [11]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['algorithm_name_recommender', 'algorithm_name_search', 'cutoff_to_optimize', 'exception_list', 'hyperparameters_best', 'hyperparameters_best_index', 'hyperparameters_df', 'metric_to_optimize', 'result_on_earlystopping_df', 'result_on_last', 'result_on_test_best', 'result_on_test_df', 'result_on_validation_best', 'result_on_validation_df', 'time_df', 'time_on_last_df', 'time_on_test_avg', 'time_on_test_total', 'time_on_train_avg', 'time_on_train_total', 'time_on_validation_avg', 'time_on_validation_total'])

In [12]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,alpha,beta,topK,implicit
0,0.326086,0.274582,432,True
1,0.468785,0.933254,744,True
2,0.70004,0.682301,453,False
3,0.33669,0.479832,101,True
4,0.718215,0.016161,447,False
5,0.332428,0.328378,47,False
6,0.363658,0.741393,687,False
7,0.711123,0.470017,782,False
8,0.819687,0.597774,721,False
9,0.11555,0.156107,782,True


In [13]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df.sort_values(by="MAP")
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.036975,0.071602,0.068794,0.01646,0.031512,0.124354,0.071043,0.048099,0.276524,0.142486,...,0.963391,0.266401,0.963391,0.021774,7.911757,0.985266,0.043069,0.586489,5.442634,0.275554
1,10,0.017457,0.029518,0.027708,0.007376,0.012542,0.057233,0.030786,0.021419,0.13517,0.064674,...,0.963391,0.130222,0.963391,0.244767,12.664238,0.999814,0.484134,0.938785,0.388292,0.403696
2,10,0.032313,0.060093,0.057473,0.013836,0.025568,0.104587,0.063052,0.041368,0.242289,0.119804,...,0.963391,0.233419,0.963391,0.124635,10.408317,0.995086,0.246521,0.771556,2.746226,0.32863
3,10,0.038192,0.072121,0.069117,0.017223,0.032177,0.126883,0.073267,0.049199,0.279092,0.147129,...,0.963391,0.268875,0.963391,0.067653,9.872136,0.995952,0.133813,0.73181,3.267087,0.306979
4,10,0.036058,0.06952,0.066748,0.015406,0.029317,0.116368,0.067994,0.046822,0.270191,0.13338,...,0.963391,0.260299,0.963391,0.006914,7.327586,0.984435,0.013675,0.543185,5.686934,0.268115
5,10,0.038212,0.072096,0.069089,0.017345,0.032611,0.128279,0.073036,0.049208,0.280289,0.148479,...,0.963391,0.270028,0.963391,0.084044,10.306973,0.997017,0.166234,0.764044,2.934551,0.314228
6,10,0.030921,0.056157,0.053557,0.013352,0.024052,0.100249,0.057436,0.039206,0.230794,0.115225,...,0.963391,0.222345,0.963391,0.18475,11.509425,0.998266,0.365424,0.85318,1.753932,0.348895
7,10,0.035429,0.067576,0.064825,0.015318,0.028829,0.114905,0.069013,0.045818,0.26478,0.132133,...,0.963391,0.255087,0.963391,0.074462,8.935912,0.987887,0.14728,0.662408,4.640385,0.296333
8,10,0.033056,0.061509,0.05882,0.014011,0.025808,0.10511,0.064104,0.042326,0.246752,0.120875,...,0.963391,0.237719,0.963391,0.111378,9.94724,0.992819,0.2203,0.737377,3.39338,0.320439
9,10,0.035255,0.06842,0.065683,0.015464,0.029795,0.117915,0.066045,0.045883,0.267124,0.13449,...,0.963391,0.257345,0.963391,0.009969,7.231467,0.980868,0.019718,0.53606,6.1534,0.265402


This are the best hyperparameters found by the bayesian search -> We will train our model using these

In [14]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'alpha': 0.3848682627100833,
 'beta': 0.36189136978424513,
 'topK': 64,
 'implicit': True}

### Recommender Testing

In [15]:
recommender = RP3betaRecommender(URM_all)
recommender.fit()

RP3betaRecommender: Similarity column 24507 (100.0%), 457.72 column/sec. Elapsed time 53.54 sec


### Submissions

In [17]:
test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [18]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [20]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions\Submission_04_RP3Beta_MixedRatings.csv', index=False)