# RP3 Beta Recommender Implementation

In [6]:
# Data import
from utils.data_util import load_URM, combine_matrices, create_submission, write_submission
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

# Model
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

from Recommenders.DataIO import DataIO

# Tuning
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

# Evaluator
from Evaluation.Evaluator import EvaluatorHoldout

In [7]:
URM_all = load_URM()

interactions_and_impressions found!


## Split into training and test set
We split the data into 85% of training set and 15% of test set.

In [8]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1597 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 892 ( 2.1%) Users that have less than 1 test interactions


In [9]:
output_folder_path = "../hyper_param_tuning/"

n_cases = 50  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

data_loader = DataIO(folder_path = output_folder_path)

In [10]:
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs


hyperparameters_range_dictionary = {
    "alpha": Real(0.1, 1.0),
    "beta": Real(0.1, 1.0),
    "topK": Integer(10,1000),
    "implicit": Categorical([True]),
    "min_rating": Categorical([0,1]),
}


recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # Data needed in the fit during hyperTuning
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},     # Additional hyperparameters for the fit function
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_validation],     # Data needed for the last step evaluation
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},     # Additional hyperparameters for the fit function
)

hyperparameterSearch = SearchBayesianSkopt(RP3betaRecommender,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "best",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = RP3betaRecommender.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.5955046574019568, 'beta': 0.3031813475742793, 'topK': 947, 'implicit': True, 'min_rating': 1}
RP3betaRecommender: Similarity column 24507 (100.0%), 463.78 column/sec. Elapsed time 52.84 sec
EvaluatorHoldout: Processed 40032 (100.0%) in 57.62 sec. Users per second: 695
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.5955046574019568, 'beta': 0.3031813475742793, 'topK': 947, 'implicit': True, 'min_rating': 1} - results: PRECISION: 0.0372002, PRECISION_RECALL_MIN_DEN: 0.0721598, RECALL: 0.0694428, MAP: 0.0163895, MAP_MIN_DEN: 0.0315446, MRR: 0.1237528, NDCG: 0.0635505, F1: 0.0484474, HIT_RATE: 0.2788519, ARHR_ALL_HITS: 0.1418944, NOVELTY: 0.0041354, AVERAGE_POPULARITY: 0.2888151, DIVERSITY_MEAN_INTER_LIST: 0.8343631, DIVERSITY_HERFINDAHL: 0.9834342, COVERAGE_ITEM: 0.7346472, COVERAGE_ITEM_HIT: 0.0339495, ITEMS_IN_GT: 0.9864937, COVERAGE_USER: 0.9616373, COVERAGE

KeyboardInterrupt: 

In [None]:
search_metadata = data_loader.load_data(RP3betaRecommender.RECOMMENDER_NAME + "_metadata.zip")
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

## Create and train the model
In this notebook we are creating a TopPop recommender, we load it from the course repository.

In [None]:
recommender = RP3betaRecommender(URM_train)
recommender.fit(alpha=best_hyperparameters['alpha'], beta=best_hyperparameters['beta'], topK=best_hyperparameters['topK'], implicit=best_hyperparameters['implicit'], min_rating=best_hyperparameters['min_rating']) # Train the model

## Evaluate the model

In [None]:
result_df, _ = evaluator_test.evaluateRecommender(recommender)
print(result_df.loc[10])

## Create and save the submission
We can now create the submission and save it as csv.

In [None]:
submission = create_submission(recommender)
write_submission(submission=submission, file_name='RP3Beta_HT_'+'['+str(result_df.loc[10]['MAP'])+']')