In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [2]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4339 (10.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 3084 ( 7.4%) Users that have less than 1 test interactions


## MF-BPR

In [3]:
from Recommenders.MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_BPR_Cython

recommender_class = MatrixFactorization_BPR_Cython

In [4]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 20
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

### Hyperparameter Tuning

In [14]:
from skopt.space import Real, Integer, Categorical
#SLIM BPR is machine learning-based technique
#Hyperparameter tuning is typical of ML models to drive the learning process

hyperparameters_range_dictionary = {
    "epochs": Categorical([100]),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "num_factors": Integer(1, 200),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

Early Stopping Setup

In [15]:
earlystopping_keywargs = {"validation_every_n": 10,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 10,
                          "validation_metric": metric_to_optimize,
                          }

We create a bayesian optimizer object, we pass the recommender and the evaluator

In [7]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

We provide the data needed to create an instance of the model, one on the URM_train, the other on the URM_all

In [8]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

### Cython

In [9]:
import pyximport
pyximport.install()

(None, <pyximport.pyximport.PyxImporter at 0x21fa2622670>)

In [10]:
!python run_compile_all_cython.py

run_compile_all_cython: Found 10 Cython files in 4 folders...
run_compile_all_cython: All files will be compiled using your current python environment: 'C:\Users\Luca\miniconda3\python.exe'
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... PASS

Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... 
Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... PASS

Compiling [3/10]: Compute_Similarity_Cython.pyx... 
Compiling [3/10]: Compute_Similarity_Cython.pyx... PASS

Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... 
Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... PASS

Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... 
Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... PASS

Compiling [6/10]: Triangular_Matrix.pyx... 
Compiling [6/10]: Triangular_Matrix.pyx... PASS

Compiling [7/10]: CFW_DVV_Similarity_Cython_SGD.pyx... 
Compiling [7/10]: CFW_DVV_Similarity_Cython_SGD.pyx... PASS

Compiling [8/10]: C

  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)


### Bayesian Search

In [16]:
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 100, 'sgd_mode': 'adam', 'num_factors': 17, 'batch_size': 4, 'user_reg': 0.00015002679963372703, 'learning_rate': 0.0003162906336872413}
MatrixFactorization_BPR_Cython_Recommender: URM Detected 1 ( 0.0%) users with no interactions.
MF_BPR: Processed 41632 (100.0%) in 0.69 sec. MSE loss 3.41E-03. Sample per second: 60074
MF_BPR: Epoch 1 of 100. Elapsed time 0.51 sec
MF_BPR: Processed 41632 (100.0%) in 0.96 sec. MSE loss 3.39E-03. Sample per second: 43581
MF_BPR: Epoch 2 of 100. Elapsed time 0.77 sec
MF_BPR: Processed 41632 (100.0%) in 1.28 sec. MSE loss 3.40E-03. Sample per second: 32640
MF_BPR: Epoch 3 of 100. Elapsed time 1.10 sec
MF_BPR: Processed 41632 (100.0%) in 0.53 sec. MSE loss 3.41E-03. Sample per second: 79036
MF_BPR: Epoch 4 of 100. Elapsed time 1.35 sec
MF_BPR: Processed 41632 (100.0%) in 0.83 sec. MSE loss 3.40E-03. Sample per second: 50282
MF_BPR: Validation begin

KeyboardInterrupt: 

Now we will fit the model with the hyperparameters obtained from the previous bayesian search and evaluate them on the validation set

## Recommender Testing

In [None]:
recommender = MatrixFactorization_BPR_Cython(URM_all)
recommender.fit(epochs=,
                sgd_mode='',
                num_factors=,
                batch_size=,
                user_reg=,
                learning_rate=)

## Submissions

In [None]:
test_users = pd.read_csv('../Dataset/data_target_users_test.csv')

In [None]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [None]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('..\Submissions\Submission_04_10933934_MF_BPR.csv', index=False)