# RP3Beta and SLIM Elastic Net Hybrid

In [48]:
# Data import
from utils.data_util import load_URM, create_submission, write_submission
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

# Model
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.DataIO import DataIO

# Hybrid
from utils.Hybrids.MergeModelsByScores import MergeTwoModelsByScoresLinear

from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

# Evaluator
from Evaluation.Evaluator import EvaluatorHoldout

In [49]:
URM_all = load_URM()

interactions_and_impressions found!


## Split into train and validation set

In [50]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1535 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 970 ( 2.3%) Users that have less than 1 test interactions


### Training Vars

In [51]:
output_folder_path = "../hyper_param_tuning/"

n_cases = 200  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

data_loader = DataIO(folder_path = output_folder_path)

## Load and fit the recommenders

In [52]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

# Initialize the recommenders
RP3betaRecommender = RP3betaRecommender(URM_train)
SLIMERecommender = SLIMElasticNetRecommender(URM_train)

# Train the RP3betaRecommender recommender with the best hyper tuned values

RP3betaRecommender.fit(
    topK= 81, alpha= 0.7446283980562399, beta= 0.3485498934203173, implicit= True, min_rating= 1
)



# Train the P3alphaRecommender model with the best hyper tuned values
SLIMERecommender.fit(
    topK= 2781, l1_ratio= 0.03797690559694552, alpha= 0.0014753426395604076
)

RP3betaRecommender: Similarity column 24507 (100.0%), 4595.83 column/sec. Elapsed time 5.33 sec
SLIMElasticNetRecommender: Processed 8986 (36.7%) in 5.00 min. Items per second: 29.95
SLIMElasticNetRecommender: Processed 18305 (74.7%) in 10.00 min. Items per second: 30.51
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 13.17 min. Items per second: 31.01


## Merge Two Models

In [53]:
from skopt.space import Real
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs


hyperparameters_range_dictionary = {
    "alpha": Real(0, 1.0),
    "beta": Real(0, 1.0),
}

recommenders = [RP3betaRecommender, SLIMERecommender]

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train, recommenders],     # Data needed in the fit during hyperTuning
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},     # Additional hyperparameters for the fit function
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_validation, recommenders],     # Data needed for the last step evaluation
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},     # Additional hyperparameters for the fit function
)

hyperparameterSearch = SearchBayesianSkopt(MergeTwoModelsByScoresLinear,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "best",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = MergeTwoModelsByScoresLinear.RECOMMENDER_NAME+"_RP3_SLIME_Merge", # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.6132935074139038, 'beta': 0.7604559382824847}
EvaluatorHoldout: Processed 40094 (100.0%) in 26.73 sec. Users per second: 1500
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.6132935074139038, 'beta': 0.7604559382824847} - results: PRECISION: 0.0437796, PRECISION_RECALL_MIN_DEN: 0.0835249, RECALL: 0.0801780, MAP: 0.0200419, MAP_MIN_DEN: 0.0377822, MRR: 0.1448224, NDCG: 0.0748204, F1: 0.0566349, HIT_RATE: 0.3124158, ARHR_ALL_HITS: 0.1696350, NOVELTY: 0.0043318, AVERAGE_POPULARITY: 0.1971517, DIVERSITY_MEAN_INTER_LIST: 0.9420630, DIVERSITY_HERFINDAHL: 0.9942040, COVERAGE_ITEM: 0.2475619, COVERAGE_ITEM_HIT: 0.0516179, ITEMS_IN_GT: 0.9862488, COVERAGE_USER: 0.9631267, COVERAGE_USER_HIT: 0.3008960, USERS_IN_GT: 0.9631267, DIVERSITY_GINI: 0.0243689, SHANNON_ENTROPY: 9.1039177, RATIO_DIVERSITY_HERFINDAHL: 0.9945329, RATIO_DIVERSITY_GINI: 0.0482013, RATIO_SHANNON_ENTR

In [54]:
search_metadata = data_loader.load_data(MergeTwoModelsByScoresLinear.RECOMMENDER_NAME + "_RP3_SLIME_Merge_metadata.zip")
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'alpha': 0.8287403188553244, 'beta': 0.9853707417008009}

In [55]:
merged = MergeTwoModelsByScoresLinear(URM_train=URM_train, recommenders=recommenders)
merged.fit(alpha=best_hyperparameters['alpha'], beta=best_hyperparameters['beta'])

In [56]:
result_df, _ = evaluator_test.evaluateRecommender(merged)
print(result_df.loc[10])

EvaluatorHoldout: Processed 40659 (100.0%) in 19.75 sec. Users per second: 2058
PRECISION                     0.050491
PRECISION_RECALL_MIN_DEN      0.084198
RECALL                        0.078977
MAP                           0.023689
MAP_MIN_DEN                   0.038974
MRR                           0.163451
NDCG                          0.079633
F1                              0.0616
HIT_RATE                       0.34354
ARHR_ALL_HITS                 0.195891
NOVELTY                       0.004333
AVERAGE_POPULARITY            0.196918
DIVERSITY_MEAN_INTER_LIST       0.9422
DIVERSITY_HERFINDAHL          0.994218
COVERAGE_ITEM                 0.253642
COVERAGE_ITEM_HIT             0.055127
ITEMS_IN_GT                   0.993594
COVERAGE_USER                 0.976699
COVERAGE_USER_HIT             0.335535
USERS_IN_GT                   0.976699
DIVERSITY_GINI                0.024587
SHANNON_ENTROPY               9.110481
RATIO_DIVERSITY_HERFINDAHL    0.994547
RATIO_DIVERSITY_GINI   

In [57]:
submission = create_submission(merged)
write_submission(submission=submission, file_name='Hybrid_RP3Beta_SLIME_HT_'+'['+str(result_df.loc[10]['MAP'])+']')

data_target_users_test found!
