In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
%matplotlib inline

In [2]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [3]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4356 (10.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 3151 ( 7.6%) Users that have less than 1 test interactions


## Slim BPR

In [4]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython

#try a SLIM BPR model
recommender_class = SLIM_BPR_Cython

In [12]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 10
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

In [13]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([700]),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "topK": Integer(5, 700),
    "lambda_i": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "lambda_j": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
}

In [14]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [15]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

In [16]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [17]:
!python run_compile_all_cython.py

run_compile_all_cython: Found 10 Cython files in 4 folders...
run_compile_all_cython: All files will be compiled using your current python environment: 'C:\Users\Luca\miniconda3\python.exe'
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... PASS

Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... 
Compiling [2/10]: MatrixFactorization_Cython_Epoch.pyx... PASS

Compiling [3/10]: Compute_Similarity_Cython.pyx... 
Compiling [3/10]: Compute_Similarity_Cython.pyx... PASS

Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... 
Compiling [4/10]: SLIM_BPR_Cython_Epoch.pyx... FAIL: Command 'C:\Users\Luca\miniconda3\python.exe C:\Users\Luca\DataspellProjects\Recommender-Systems-Challenge-2022/CythonCompiler/compile_script.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace' returned non-zero exit status 1.

Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... 
Compiling [5/10]: Sparse_Matrix_Tree_CSR.pyx... PASS

Com

  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
  tree = Parsing.p_module(s, pxd, full_module_name)
error: command 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.32.31326\\bin\\HostX86\\x64\\link.exe' failed with exit code 1104
Traceback (most recent call last):
  File "C:\Users\Luca\DataspellProjects\Recommender-Systems-Challenge-2022\run_compile_all_cython.py", line 59, in <module>
    run_compile_subprocess(file_path, [file_name])
  File "C:\Users\Luca\DataspellProjects\Recommender-Systems-Challenge-2022\CythonCompiler\run_compile_subprocess.py", line 51, in run_compile_subprocess
    raise exc
  File "C:\Users\Luca\DataspellProjects\Recommender-Systems-Challenge-2022\CythonCompiler\run_compile_subprocess.py", line 32, in run_compile_subprocess
    output = subprocess.check_output(' '.join(command),
  File "C:\Users\Luca\miniconda3\lib\subprocess.py", line 424, in check_output
    return run(*

In [18]:
#let's run the bayesian search
hyperparameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'epochs': 700, 'sgd_mode': 'adam', 'topK': 162, 'lambda_i': 0.02893179462978207, 'lambda_j': 0.024830665584446056, 'learning_rate': 0.00022273197535303317}
SLIM_BPR_Recommender: URM Detected 2 ( 0.0%) users with no interactions.
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 41629 (100.0%) in 0.40 sec. BPR loss is 9.60E-04. Sample per second: 102933
SLIM_BPR_Recommender: Epoch 1 of 700. Elapsed time 0.12 sec
Processed 41629 (100.0%) in 0.54 sec. BPR loss is 4.36E-03. Sample per second: 76692
SLIM_BPR_Recommender: Epoch 2 of 700. Elapsed time 0.26 sec
Processed 41629 (100.0%) in 0.68 sec. BPR loss is 8.61E-03. Sample per second: 61067
SLIM_BPR_Recommender: Epoch 3 of 700. Elapsed time 0.40 sec
Processed 41629

In [19]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['algorithm_name_recommender', 'algorithm_name_search', 'cutoff_to_optimize', 'exception_list', 'hyperparameters_best', 'hyperparameters_best_index', 'hyperparameters_df', 'metric_to_optimize', 'result_on_earlystopping_df', 'result_on_last', 'result_on_test_best', 'result_on_test_df', 'result_on_validation_best', 'result_on_validation_df', 'time_df', 'time_on_last_df', 'time_on_test_avg', 'time_on_test_total', 'time_on_train_avg', 'time_on_train_total', 'time_on_validation_avg', 'time_on_validation_total'])

In [20]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,epochs,sgd_mode,topK,lambda_i,lambda_j,learning_rate
0,15,adam,162,0.028932,0.024831,0.000223
1,690,adagrad,203,0.013059,0.053613,0.00021
2,690,sgd,248,0.001175,0.008156,0.004101
3,45,sgd,237,0.098615,0.004788,0.009941
4,690,sgd,260,0.000864,0.006736,0.004865
5,135,adagrad,284,0.0001,0.005777,0.00049
6,15,adam,174,0.0001,0.002028,0.002223
7,150,adagrad,116,0.099904,0.001133,0.001331
8,15,adagrad,590,0.1,0.0001,0.048334
9,540,adagrad,5,0.0001,0.055215,0.002598


In [21]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.029042,0.075548,0.074617,0.01265,0.033033,0.101065,0.060334,0.041811,0.230113,0.112795,...,0.895361,0.206034,0.895361,0.026106,7.191279,0.975711,0.059577,0.545582,5.963483,0.389159
1,10,0.030768,0.079168,0.078173,0.013758,0.035275,0.108266,0.064066,0.044156,0.240201,0.121725,...,0.895361,0.215066,0.895361,0.028121,7.489129,0.980137,0.064175,0.568179,5.539833,0.395767
2,10,0.032063,0.083292,0.082319,0.014368,0.037373,0.113851,0.067544,0.046151,0.251254,0.127643,...,0.895361,0.224963,0.895361,0.02718,8.062424,0.986952,0.062029,0.611673,4.833712,0.405138
3,10,0.030124,0.077981,0.077035,0.012918,0.033111,0.102927,0.061533,0.043311,0.239664,0.115075,...,0.895361,0.214586,0.895361,0.025836,8.370642,0.99153,0.058961,0.635056,3.977819,0.414112
4,10,0.032249,0.083742,0.082749,0.014433,0.037521,0.11432,0.067835,0.04641,0.252623,0.128201,...,0.895361,0.226188,0.895361,0.027862,8.142371,0.987686,0.063585,0.617738,4.740049,0.406911
5,10,0.030362,0.078117,0.07713,0.013399,0.034426,0.105433,0.062781,0.043572,0.236874,0.118537,...,0.895361,0.212088,0.895361,0.027304,7.3486,0.978078,0.062312,0.557517,5.749305,0.392777
6,10,0.029684,0.077371,0.076429,0.012904,0.033628,0.102652,0.061526,0.04276,0.234379,0.114821,...,0.895361,0.209854,0.895361,0.02762,7.299853,0.976671,0.063034,0.553819,5.892495,0.390964
7,10,0.032544,0.082636,0.081524,0.014815,0.036954,0.114405,0.067235,0.046518,0.249242,0.129813,...,0.895361,0.223162,0.895361,0.032102,8.184916,0.989669,0.073262,0.620966,4.400237,0.410667
8,10,0.021101,0.050217,0.049316,0.008369,0.019673,0.066795,0.038941,0.029556,0.169935,0.074571,...,0.895361,0.152154,0.895361,0.084752,10.684975,0.998996,0.193417,0.810638,1.339368,0.484435
9,10,0.030032,0.07494,0.073942,0.013429,0.033869,0.106274,0.061752,0.042715,0.234674,0.119194,...,0.895361,0.210118,0.895361,0.091922,9.966349,0.995125,0.20978,0.756118,3.044067,0.465576


In [22]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'epochs': 150,
 'sgd_mode': 'adagrad',
 'topK': 116,
 'lambda_i': 0.0999035041910438,
 'lambda_j': 0.001133491359312735,
 'learning_rate': 0.0013313596244767724}

In [23]:
recommender = SLIM_BPR_Cython(URM_all)
recommender.fit(epochs=462, sgd_mode = "sgd", topK = 50, lambda_i = 0.0002100158148046903, lambda_j = 0.00021427617376060016, learning_rate = 0.02543769736452639)

Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 41629 (100.0%) in 0.46 sec. BPR loss is 3.04E-04. Sample per second: 89830
SLIM_BPR_Recommender: Epoch 1 of 150. Elapsed time 0.16 sec
Processed 41629 (100.0%) in 0.66 sec. BPR loss is 9.97E-04. Sample per second: 63385
SLIM_BPR_Recommender: Epoch 2 of 150. Elapsed time 0.35 sec
Processed 41629 (100.0%) in 0.83 sec. BPR loss is 1.69E-03. Sample per second: 50404
SLIM_BPR_Recommender: Epoch 3 of 150. Elapsed time 0.52 sec
Processed 41629 (100.0%) in 1.00 sec. BPR loss is 2.24E-03. Sample per second: 41494
SLIM_BPR_Recommender: Epoch 4 of 150. Elapsed time 0.70 sec
Processed 41629 (100.0%) in 0.19 sec. BPR loss is 3.06E-03. Sample per second: 213253
SLIM_BPR_Recommender: Epoch 5 of 150. Elapsed time 0.89 sec
Processed 41629 (100.0%) in 0.37 sec. BPR loss is 3.72

## Submissions

In [26]:
test_users = pd.read_csv('Dataset\data_target_users_test.csv')
test_users

Unnamed: 0,user_id
0,0
1,1
2,2
3,3
4,4
...,...
41111,41624
41112,41625
41113,41626
41114,41627


In [28]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user, cutoff=10))

In [29]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submission_10933934.csv', index=False)