In [2]:
import pandas as pd
import numpy as np

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [3]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1530 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 920 ( 2.2%) Users that have less than 1 test interactions


## RP3 Beta Recommender

In [4]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_class = RP3betaRecommender

In [5]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

### Hyperparameter Tuning

In [6]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "alpha": Real(low=0, high=1, prior='uniform'),
    "beta": Real(low=0, high=1, prior='uniform'),
    "topK": Integer(1, 800),
    "implicit": Categorical([True, False])
}

We create a bayesian optimizer object, we pass the recommender and the evaluator

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

We provide data needed to create the instance of the model, one on the URM_train, the other on URM_all

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [9]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

### Bayesian Search

In [10]:
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.20438355455369966, 'beta': 0.8407190029250152, 'topK': 613, 'implicit': True}
RP3betaRecommender: Similarity column 24507 (100.0%), 598.48 column/sec. Elapsed time 40.95 sec
EvaluatorHoldout: Processed 40099 (100.0%) in 48.41 sec. Users per second: 828
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.20438355455369966, 'beta': 0.8407190029250152, 'topK': 613, 'implicit': True} - results: PRECISION: 0.0215841, PRECISION_RECALL_MIN_DEN: 0.0381383, RECALL: 0.0362233, MAP: 0.0087803, MAP_MIN_DEN: 0.0154429, MRR: 0.0669532, NDCG: 0.0334296, F1: 0.0270501, HIT_RATE: 0.1657149, ARHR_ALL_HITS: 0.0764101, NOVELTY: 0.0057371, AVERAGE_POPULARITY: 0.0281429, DIVERSITY_MEAN_INTER_LIST: 0.9927870, DIVERSITY_HERFINDAHL: 0.9992762, COVERAGE_ITEM: 0.8526543, COVERAGE_ITEM_HIT: 0.0701840, ITEMS_IN_GT: 0.9864937, COVERAGE_USER: 0.9632468, COVERAGE_USER_HIT: 0.1596243, USERS_IN_G

### Search Results

In [11]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['hyperparameters_df', 'time_on_validation_avg', 'exception_list', 'time_on_test_total', 'hyperparameters_best_index', 'result_on_test_best', 'result_on_earlystopping_df', 'time_on_test_avg', 'algorithm_name_recommender', 'hyperparameters_best', 'time_on_validation_total', 'time_on_train_total', 'time_df', 'result_on_test_df', 'result_on_validation_best', 'time_on_last_df', 'result_on_last', 'time_on_train_avg', 'algorithm_name_search', 'metric_to_optimize', 'result_on_validation_df', 'cutoff_to_optimize'])

In [12]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,alpha,beta,topK,implicit
0,0.204384,0.840719,613,True
1,0.827766,0.856954,350,False
2,0.984696,0.151919,237,True
3,0.276274,0.553395,625,True
4,0.620667,0.674237,239,True
5,0.181416,0.801565,578,True
6,0.006501,0.968413,154,False
7,0.869245,0.647591,182,False
8,0.591912,0.27908,695,True
9,0.857673,0.895768,168,False


In [13]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.021584,0.038138,0.036223,0.00878,0.015443,0.066953,0.03343,0.02705,0.165715,0.07641,...,0.963247,0.159624,0.963247,0.24057,12.471257,0.999606,0.475683,0.924407,0.556848,0.386247
1,10,0.021108,0.035406,0.033255,0.008757,0.014507,0.063964,0.03158,0.025824,0.155415,0.074491,...,0.963247,0.149703,0.963247,0.247355,12.677779,0.999817,0.489099,0.939715,0.499711,0.40103
2,10,0.038649,0.07464,0.071809,0.017105,0.032561,0.127392,0.065685,0.050252,0.285768,0.147091,...,0.963247,0.275265,0.963247,0.030453,8.108083,0.987472,0.060215,0.600996,5.284984,0.2777
3,10,0.038405,0.072606,0.069664,0.017363,0.032426,0.127371,0.065033,0.049514,0.279982,0.148125,...,0.963247,0.269692,0.963247,0.11113,9.897171,0.994209,0.219738,0.733608,3.665178,0.308826
4,10,0.036809,0.06855,0.065669,0.016462,0.030159,0.119954,0.061178,0.047175,0.267812,0.140011,...,0.963247,0.257969,0.963247,0.133853,10.832357,0.997456,0.264669,0.802927,2.501157,0.330707
5,10,0.024868,0.044711,0.04263,0.010284,0.018469,0.077758,0.03927,0.031412,0.188932,0.089107,...,0.963247,0.181989,0.963247,0.220559,12.205688,0.999387,0.436115,0.904723,0.812563,0.373463
6,10,0.014419,0.025686,0.024327,0.005746,0.010248,0.046012,0.022485,0.018106,0.116736,0.051252,...,0.963247,0.112446,0.963247,0.164233,11.972298,0.999382,0.32474,0.887423,0.300473,0.394845
7,10,0.037784,0.070374,0.067395,0.017131,0.031371,0.124717,0.063281,0.048421,0.273373,0.145591,...,0.963247,0.263326,0.963247,0.128103,10.659106,0.996705,0.253299,0.790085,2.882787,0.327746
8,10,0.036861,0.071146,0.068451,0.016198,0.030982,0.121785,0.062649,0.047918,0.276466,0.140006,...,0.963247,0.266305,0.963247,0.041371,7.900854,0.982885,0.081803,0.585635,5.864757,0.274893
9,10,0.018482,0.031589,0.029765,0.007654,0.012958,0.057394,0.028147,0.022804,0.139654,0.065987,...,0.963247,0.134522,0.963247,0.205732,12.438339,0.999777,0.406796,0.921967,0.396959,0.406457


This are the best hyperparameters found by the bayesian search -> We will train our model using these

In [14]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'alpha': 0.3196900461986604,
 'beta': 0.3530045735373658,
 'topK': 44,
 'implicit': False}

### Recommender Testing

In [15]:
recommender = RP3betaRecommender(URM_train)
recommender.fit(alpha=1.0, beta= 0.4520495673133021, topK=167, implicit=True)

RP3betaRecommender: Similarity column 24507 (100.0%), 943.71 column/sec. Elapsed time 25.97 sec


In [16]:
result_df, _ = evaluator_test.evaluateRecommender(recommender)
print("rp3Beta Recommender - MAP: {}".format(result_df.loc[10]["MAP"]))

EvaluatorHoldout: Processed 40709 (100.0%) in 36.92 sec. Users per second: 1103
rp3Beta Recommender - MAP: 0.021503547049187645


### Submissions

In [17]:
test_users = pd.read_csv('/kaggle/working/Recommender-Systems-Challenge-2022/Dataset/data_target_users_test.csv')

In [18]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [19]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions\Submission_RP3Beta_New.csv', index=False)