In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [2]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_explicit_URM(log_base=17, views_weight=1, details_weight=1)
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating explicit URM...


In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [6]:
from Recommenders.EASE_R.EASE_R_Recommender import (
    EASE_R_Recommender,
)
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/EASE_R_Recommender2/"
recommender_class = EASE_R_Recommender
n_cases = 10
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Categorical([None]),
    "normalize_matrix": Categorical([False]),
    "l2_norm": Real(low=1e0, high=1e7, prior="log-uniform"),
}

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


In [7]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': None, 'normalize_matrix': False, 'l2_norm': 4.41782047647468}
EASE_R_Recommender: Fitting model... 


In [6]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.048894,0.079706,0.075291,0.023861,0.038333,0.164898,0.088539,0.059287,0.333349,0.19725,...,1.0,0.333349,1.0,0.013883,8.437827,0.992855,0.027291,0.625307,4.229244,0.240302
1,10,0.047657,0.076892,0.072517,0.023309,0.03719,0.161043,0.086087,0.057515,0.325278,0.19272,...,1.0,0.325278,1.0,0.014937,8.516477,0.993184,0.029363,0.631136,4.148085,0.241117
2,10,0.034836,0.054413,0.051037,0.016329,0.025314,0.118425,0.061092,0.041408,0.251555,0.138433,...,1.0,0.251555,1.0,0.012603,8.289924,0.993168,0.024774,0.614347,4.146882,0.237864
3,10,0.039247,0.066416,0.063139,0.018339,0.030523,0.135449,0.070234,0.048405,0.288525,0.156909,...,1.0,0.288525,1.0,0.004119,6.690634,0.97921,0.008097,0.495827,6.303658,0.220256
4,10,0.030777,0.053826,0.051412,0.013383,0.023076,0.104523,0.05353,0.038504,0.240722,0.117861,...,1.0,0.240722,1.0,0.00119,5.022816,0.949751,0.002338,0.372229,8.582979,0.204999
5,10,0.03131,0.054719,0.052266,0.013721,0.023641,0.106859,0.054713,0.039161,0.244013,0.120655,...,1.0,0.244013,1.0,0.001286,5.118995,0.951772,0.002528,0.379357,8.469404,0.205735
6,10,0.048164,0.079063,0.074785,0.023412,0.037824,0.163515,0.087271,0.058592,0.3315,0.194613,...,1.0,0.3315,1.0,0.0116,8.208905,0.99196,0.022802,0.608342,4.434296,0.237532
7,10,0.03082,0.053889,0.051471,0.013419,0.023128,0.104806,0.053636,0.038554,0.24101,0.118179,...,1.0,0.24101,1.0,0.001198,5.031903,0.949941,0.002355,0.372902,8.572098,0.205068
8,10,0.048908,0.079815,0.075419,0.023838,0.03834,0.165054,0.088558,0.059337,0.333878,0.197266,...,1.0,0.333878,1.0,0.013563,8.409328,0.992745,0.026662,0.623195,4.25543,0.239965
9,10,0.031346,0.054789,0.052335,0.013746,0.023673,0.107042,0.054802,0.039208,0.244277,0.120872,...,1.0,0.244277,1.0,0.001296,5.128718,0.951973,0.002549,0.380077,8.457989,0.20581


In [7]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,normalize_matrix,l2_norm
0,,False,79.264014
1,,False,31.030268
2,,False,1.289425
3,,False,4482.123797
4,,False,9999656.678602
5,,False,167435.051367
6,,False,227.18694
7,,False,1497062.080804
8,,False,96.03677
9,,False,151652.025424


In [8]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': None, 'normalize_matrix': False, 'l2_norm': 79.26401388961496}

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

['Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 395, in _evaluate_on_validation\n    self.model_counter: recommender_instance.get_validation_summary_table()\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\Recommenders\\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table\n    return self._earlystopping_validation_summary_df.copy()\nAttributeError: \'NoneType\' object has no attribute \'copy\'\n',
 'Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developin

In [9]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 8.01 min
EASE_R_Recommender: Saving model in file 'result_experiments/EASE_R_Recommender/EASE_R_Recommender_best_model_trained_on_everything.zip'
EASE_R_Recommender: Saving complete


In [None]:
from utils.create_submission import create_submission

create_submission(recommender)