In [2]:
import numpy as np
import pandas as pd
import scipy.sparse as sps

In [3]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [4]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1580 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 955 ( 2.3%) Users that have less than 1 test interactions


## Slim BPR

In [5]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython

#try a SLIM BPR model
recommender_class = SLIM_BPR_Cython

In [6]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 10
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

In [7]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([700]),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "topK": Integer(1, 800),
    "lambda_i": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "lambda_j": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
}

In [8]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [9]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

In [10]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [12]:
!python run_compile_all_cython.py

In [13]:
#let's run the bayesian search
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 700, 'sgd_mode': 'sgd', 'topK': 73, 'lambda_i': 0.00902834426038083, 'lambda_j': 0.03782342660009318, 'learning_rate': 0.001592760049267801}
SearchBayesianSkopt: Config 0 Exception. Config: {'epochs': 700, 'sgd_mode': 'sgd', 'topK': 73, 'lambda_i': 0.00902834426038083, 'lambda_j': 0.03782342660009318, 'learning_rate': 0.001592760049267801} - Exception: Traceback (most recent call last):
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 468, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 326, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current

Traceback (most recent call last):
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 468, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 326, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current_fit_hyperparameters)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 300, in _fit_model
    **current_fit_hyperparameters)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/Recommenders/SLIM/Cython/SLIM_BPR_Cython.py", line 76, in fit
    from Recommenders.SLIM.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
ModuleNotFoundError: No module named 'Recommenders.SLIM.Cython.SLIM_BPR_C

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2650
Function value obtained: 65504.0000
Current minimum: 65504.0000
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 700, 'sgd_mode': 'adam', 'topK': 585, 'lambda_i': 0.056611648521484814, 'lambda_j': 0.00031613430674430246, 'learning_rate': 0.0033139572601774757}
SearchBayesianSkopt: Config 1 Exception. Config: {'epochs': 700, 'sgd_mode': 'adam', 'topK': 585, 'lambda_i': 0.056611648521484814, 'lambda_j': 0.00031613430674430246, 'learning_rate': 0.0033139572601774757} - Exception: Traceback (most recent call last):
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 468, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/kaggle/working/Recommender-Systems-Challen

Traceback (most recent call last):
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 468, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 326, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current_fit_hyperparameters)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/HyperparameterTuning/SearchAbstractClass.py", line 300, in _fit_model
    **current_fit_hyperparameters)
  File "/kaggle/working/Recommender-Systems-Challenge-2022/Recommenders/SLIM/Cython/SLIM_BPR_Cython.py", line 76, in fit
    from Recommenders.SLIM.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
ModuleNotFoundError: No module named 'Recommenders.SLIM.Cython.SLIM_BPR_C

In [14]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['result_on_validation_best', 'result_on_test_best', 'result_on_last', 'cutoff_to_optimize', 'algorithm_name_recommender', 'time_on_last_df', 'result_on_validation_df', 'time_on_validation_total', 'hyperparameters_df', 'hyperparameters_best_index', 'time_on_train_total', 'hyperparameters_best', 'result_on_earlystopping_df', 'algorithm_name_search', 'time_on_train_avg', 'exception_list', 'time_on_test_avg', 'result_on_test_df', 'metric_to_optimize', 'time_on_validation_avg', 'time_on_test_total', 'time_df'])

In [15]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,epochs,sgd_mode,topK,lambda_i,lambda_j,learning_rate
0,700.0,sgd,73.0,0.009028,0.037823,0.001593
1,700.0,adam,585.0,0.056612,0.000316,0.003314
2,700.0,sgd,639.0,0.005172,0.041616,0.000383
3,,,,,,
4,,,,,,
5,,,,,,
6,,,,,,
7,,,,,,
8,,,,,,
9,,,,,,


In [16]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [17]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [18]:
recommender = SLIM_BPR_Cython(URM_all)
#recommender.fit(epochs=462, sgd_mode = "sgd", topK = 50, lambda_i = 0.0002100158148046903, lambda_j = 0.00021427617376060016, learning_rate = 0.02543769736452639)

In [19]:
print("MAP of the starting models")

result_df, _ = evaluator_test.evaluateRecommender(recommender)
print("SLIM BPR - MAP: {}".format(result_df.loc[10]["MAP"]))

MAP of the starting models


AttributeError: 'SLIM_BPR_Cython' object has no attribute 'W_sparse'