In [2]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [3]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_implicit_URM()
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating implicit URM...


In [4]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [5]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import (
    SLIM_BPR_Cython,
)
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/SLIM_BPR/"
recommender_class = SLIM_BPR_Cython
n_cases = 10
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Integer(100, 2000),
    "epochs": Categorical([1500]),
    "symmetric": Categorical([True, False]),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "lambda_i": Real(low=1e-5, high=1e-2, prior="log-uniform"),
    "lambda_j": Real(low=1e-5, high=1e-2, prior="log-uniform"),
    "learning_rate": Real(low=1e-4, high=1e-1, prior="log-uniform"),
}

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


In [5]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 166, 'epochs': 1500, 'symmetric': True, 'sgd_mode': 'adam', 'lambda_i': 0.0001235574369479305, 'lambda_j': 0.0016934349366472816, 'learning_rate': 0.00010280946828529104}
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 41629 (100.0%) in 0.77 sec. BPR loss is 3.86E-04. Sample per second: 54217
SLIM_BPR_Recommender: Epoch 1 of 1500. Elapsed time 0.23 sec
Processed 41629 (100.0%) in 1.04 sec. BPR loss is 1.84E-03. Sample per second: 40102
SLIM_BPR_Recommender: Epoch 2 of 1500. Elapsed time 0.50 sec
Processed 41629 (100.0%) in 0.28 sec. BPR loss is 3.73E-03. Sample per second: 150625
SLIM_BPR_Recommender: Epoch 3 of 1500. Elapsed time 0.74 sec
Processed 41629 (100.0%) in 0.51 sec. BPR loss is 6.03E-03. Sa

Traceback (most recent call last):
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 604, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 395, in _evaluate_on_validation
    self.model_counter: recommender_instance.get_validation_summary_table()
  File "d:\Developing\RecSys_PoliMi_challenge_2020\Recommenders\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table
    return self._earlystopping_validation_summary_df.copy()
AttributeError: 'NoneType' object has no attribute 'copy'


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 438.3244
Function value obtained: 65504.0000
Current minimum: 65504.0000
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 1324, 'epochs': 1500, 'symmetric': False, 'sgd_mode': 'adagrad', 'lambda_i': 3.20416988227697e-05, 'lambda_j': 0.0063093592449212895, 'learning_rate': 0.016490691898869144}
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 41629 (100.0%) in 0.80 sec. BPR loss is 1.08E-02. Sample per second: 52355
SLIM_BPR_Recommender: Epoch 1 of 1500. Elapsed time 0.47 sec
Processed 41629 (100.0%) in 0.97 sec. BPR loss is 2.70E-02. Sample per second: 43137
SLIM_BPR_Recommender: Epoch 2 of 1500. Elapsed time 0.64 sec
Processed 41629 (100.0%) in 1.09 sec. BPR loss is 4.25E-02. Sample

Traceback (most recent call last):
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 604, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 395, in _evaluate_on_validation
    self.model_counter: recommender_instance.get_validation_summary_table()
  File "d:\Developing\RecSys_PoliMi_challenge_2020\Recommenders\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table
    return self._earlystopping_validation_summary_df.copy()
AttributeError: 'NoneType' object has no attribute 'copy'


Iteration No: 2 ended. Evaluation done at random point.
Time taken: 384.0709
Function value obtained: 65504.0000
Current minimum: 65504.0000
Iteration No: 3 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 1191, 'epochs': 1500, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 0.002197173369041251, 'lambda_j': 2.672880303082952e-05, 'learning_rate': 0.0011704837541929795}
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 41629 (100.0%) in 0.78 sec. BPR loss is 1.17E-02. Sample per second: 53578
SLIM_BPR_Recommender: Epoch 1 of 1500. Elapsed time 0.10 sec
Processed 41629 (100.0%) in 0.92 sec. BPR loss is 5.64E-02. Sample per second: 45412
SLIM_BPR_Recommender: Epoch 2 of 1500. Elapsed time 0.24 sec
Processed 41629 (100.0%) in 1.02 sec. BPR loss is 1.14E-01. Sample p

Traceback (most recent call last):
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 604, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(
  File "d:\Developing\RecSys_PoliMi_challenge_2020\HyperparameterTuning\SearchAbstractClass.py", line 395, in _evaluate_on_validation
    self.model_counter: recommender_instance.get_validation_summary_table()
  File "d:\Developing\RecSys_PoliMi_challenge_2020\Recommenders\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table
    return self._earlystopping_validation_summary_df.copy()
AttributeError: 'NoneType' object has no attribute 'copy'


SearchBayesianSkopt: Search interrupted. No valid config was found during the initial random initialization



In [6]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [7]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate
0,166.0,1499.0,True,adam,0.000124,0.001693,0.000103
1,1324.0,1499.0,False,adagrad,3.2e-05,0.006309,0.016491
2,1191.0,1499.0,False,adam,0.002197,2.7e-05,0.00117
3,,,,,,,
4,,,,,,,
5,,,,,,,
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,,,,,,,


In [8]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

['Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 395, in _evaluate_on_validation\n    self.model_counter: recommender_instance.get_validation_summary_table()\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\Recommenders\\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table\n    return self._earlystopping_validation_summary_df.copy()\nAttributeError: \'NoneType\' object has no attribute \'copy\'\n',
 'Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developin

In [10]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

TypeError: fit() argument after ** must be a mapping, not NoneType

In [None]:
from utils.create_submission import create_submission

create_submission(recommender)