In [3]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
%matplotlib inline

In [4]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [5]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4243 (10.2%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 3069 ( 7.4%) Users that have less than 1 test interactions


## Slim BPR

In [8]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython

#try a SLIM BPR model
recommender_class = SLIM_BPR_Cython

In [9]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

In [10]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([700]),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "topK": Integer(5, 700),
    "lambda_i": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "lambda_j": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
}

In [11]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [12]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

In [13]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [14]:
!python run_compile_all_cython.py

run_compile_all_cython: Found 10 Cython files in 4 folders...
run_compile_all_cython: All files will be compiled using your current python environment: 'C:\Users\Luca\miniconda3\python.exe'
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... PASS

Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... 
Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... PASS

Compiling [3/10]: Compute_Similarity_Cython.pyx... 
Compiling [3/10]: Compute_Similarity_Cython.pyx... PASS

Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... 
Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... PASS

Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... 
Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... PASS

Compiling [6/10]: Triangular_Matrix.pyx... 
Compiling [6/10]: Triangular_Matrix.pyx... PASS

Compiling [7/10]: CFW_DVV_Similarity_Cython_SGD.pyx... 
Compiling [7/10]: CFW_DVV_Similarity_Cython_SGD.pyx... PASS

Compiling [8/10]: C

  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)


In [15]:
#let's run the bayesian search
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 700, 'sgd_mode': 'adam', 'topK': 564, 'lambda_i': 0.002253105796215214, 'lambda_j': 0.002086612185449316, 'learning_rate': 0.0007713387033313245}
SLIM_BPR_Recommender: URM Detected 1 ( 0.0%) users with no interactions.
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.


KeyboardInterrupt: 

In [None]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [None]:
recommender = SLIM_BPR_Cython(URM_all)
recommender.fit(epochs=, sgd_mode=, topK=, lambda_i=, lambda_j=, learning_rate=)

## Submissions

In [31]:
test_users = pd.read_csv('..\Dataset\data_target_users_test.csv')
test_users

FileNotFoundError: [Errno 2] No such file or directory: 'Dataset\\data_target_users_test.csv'

In [None]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, at=10))

In [None]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submission_10933934.csv', index=False)