In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [2]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_explicit_URM()
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating explicit URM...


In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [8]:
from Recommenders.GraphBased.RP3betaRecommender import (
    RP3betaRecommender,
)
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/RP3betaRecommender_explicit/"
recommender_class = RP3betaRecommender
n_cases = 10
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Integer(5, 1000),
    "alpha": Real(low=0, high=2, prior="uniform"),
    "beta": Real(low=0, high=2, prior="uniform"),
    "normalize_similarity": Categorical([True, False]),
}

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


In [9]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 584, 'alpha': 1.4875779670057994, 'beta': 1.4920670083680772, 'normalize_similarity': True}
RP3betaRecommender: Similarity column 24507 (100.0%), 431.00 column/sec. Elapsed time 56.86 sec
EvaluatorHoldout: Processed 41629 (100.0%) in 1.27 min. Users per second: 545
SearchBayesianSkopt: New best config found. Config 0: {'topK': 584, 'alpha': 1.4875779670057994, 'beta': 1.4920670083680772, 'normalize_similarity': True} - results: PRECISION: 0.0003243, PRECISION_RECALL_MIN_DEN: 0.0004766, RECALL: 0.0003927, MAP: 0.0000835, MAP_MIN_DEN: 0.0001189, MRR: 0.0006928, NDCG: 0.0003329, F1: 0.0003552, HIT_RATE: 0.0028586, ARHR_ALL_HITS: 0.0007592, NOVELTY: 0.0065139, AVERAGE_POPULARITY: 0.0026377, DIVERSITY_MEAN_INTER_LIST: 0.9962453, DIVERSITY_HERFINDAHL: 0.9996221, COVERAGE_ITEM: 0.4284082, COVERAGE_ITEM_HIT: 0.0044885, ITEMS_IN_GT: 0.9925736, COVERAGE_USER: 1.0000000, COVERAGE_USER_HIT: 

In [10]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.000324,0.000477,0.000393,8.4e-05,0.000119,0.000693,0.000333,0.000355,0.002859,0.000759,...,1.0,0.002859,1.0,0.135454,12.084331,0.999952,0.266271,0.895541,0.052121,0.372687
1,10,0.042514,0.068011,0.064236,0.020603,0.032753,0.145801,0.077463,0.051165,0.296764,0.172489,...,1.0,0.296764,1.0,0.062906,9.778231,0.994849,0.123658,0.724641,3.342692,0.270012
2,10,0.00515,0.007088,0.006338,0.002146,0.002872,0.015201,0.00793,0.005683,0.038243,0.017932,...,1.0,0.038243,1.0,0.173046,12.441135,1.000039,0.340166,0.921983,0.072741,0.36829
3,10,0.043136,0.070844,0.067041,0.020395,0.033227,0.145821,0.075616,0.052495,0.305844,0.171528,...,1.0,0.305844,1.0,0.027866,8.216271,0.98865,0.054779,0.608888,4.854045,0.239004
4,10,0.040589,0.066103,0.062457,0.018906,0.030467,0.135219,0.071116,0.049203,0.289101,0.158981,...,1.0,0.289101,1.0,0.044035,9.167429,0.993883,0.086562,0.679376,3.280305,0.255426
5,10,0.039112,0.062693,0.05919,0.018061,0.028517,0.129628,0.068797,0.047101,0.279325,0.152376,...,1.0,0.279325,1.0,0.056034,9.21373,0.99096,0.11015,0.682808,4.171347,0.259962
6,10,0.04439,0.073583,0.069615,0.021039,0.034659,0.150183,0.073438,0.054212,0.314564,0.176744,...,1.0,0.314564,1.0,0.027636,8.119163,0.986437,0.054325,0.601692,5.327537,0.236541
7,10,0.000283,0.000597,0.000575,5.8e-05,0.000127,0.000584,0.000392,0.00038,0.002835,0.000584,...,1.0,0.002835,1.0,0.349675,13.418974,1.000222,0.687377,0.994448,0.103712,0.359444
8,10,0.044957,0.072969,0.068936,0.02167,0.035018,0.152673,0.079654,0.054422,0.312619,0.180829,...,1.0,0.312619,1.0,0.027526,8.763106,0.993867,0.05411,0.649413,3.834819,0.247274
9,10,0.041094,0.067677,0.064102,0.018909,0.030666,0.136762,0.072098,0.050082,0.295659,0.160138,...,1.0,0.295659,1.0,0.016376,7.671735,0.985428,0.032192,0.568534,5.398011,0.232966


In [11]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,alpha,beta,normalize_similarity
0,584,1.487578,1.492067,True
1,85,1.579137,0.215863,True
2,391,1.326802,1.064339,True
3,96,0.823334,0.0,False
4,1000,0.210768,0.681386,False
5,1000,1.725199,0.091251,True
6,772,0.0,0.307223,True
7,5,2.0,0.337019,False
8,146,0.874504,0.0,True
9,1000,1.319225,0.0,True


In [8]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

['Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 395, in _evaluate_on_validation\n    self.model_counter: recommender_instance.get_validation_summary_table()\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\Recommenders\\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table\n    return self._earlystopping_validation_summary_df.copy()\nAttributeError: \'NoneType\' object has no attribute \'copy\'\n',
 'Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developin

In [10]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

TypeError: fit() argument after ** must be a mapping, not NoneType

In [None]:
from utils.create_submission import create_submission

create_submission(recommender)