In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [2]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_implicit_URM()
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating implicit URM...


In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [6]:
from Recommenders.KNN.ItemKNNSimilarityHybridRecommender import (
    ItemKNNSimilarityHybridRecommender,
)
from Recommenders.GraphBased.RP3betaRecommender import (
    RP3betaRecommender,
)
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/ItemKNNSimilarityHybridRecommender/"
recommender_class = ItemKNNSimilarityHybridRecommender
n_cases = 10
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Integer(5, 2000),
    "alpha": Real(0.0, 1.0),
}

knn = ItemKNNCFRecommender(URM_train)
knn.load_model(
    folder_path="result_experiments/ItemKNNCFRecommender/",
    file_name=ItemKNNCFRecommender.RECOMMENDER_NAME + "_best_model.zip",
)
rp3 = RP3betaRecommender(URM_train)
rp3.load_model(
    folder_path="result_experiments/RP3betaRecommender/",
    file_name=RP3betaRecommender.RECOMMENDER_NAME + "_best_model.zip",
)

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
        knn.W_sparse,
        rp3.W_sparse,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


ItemKNNCFRecommender: Loading model from file 'result_experiments/ItemKNNCFRecommender/ItemKNNCFRecommender_best_model.zip'
ItemKNNCFRecommender: Loading complete
RP3betaRecommender: Loading model from file 'result_experiments/RP3betaRecommender/RP3betaRecommender_best_model.zip'
RP3betaRecommender: Loading complete


In [7]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 133, 'alpha': 0.19002161632387143}
EvaluatorHoldout: Processed 41629 (100.0%) in 43.17 sec. Users per second: 964
SearchBayesianSkopt: New best config found. Config 0: {'topK': 133, 'alpha': 0.19002161632387143} - results: PRECISION: 0.0496457, PRECISION_RECALL_MIN_DEN: 0.0812767, RECALL: 0.0769279, MAP: 0.0241158, MAP_MIN_DEN: 0.0391318, MRR: 0.1662943, NDCG: 0.0796729, F1: 0.0603465, HIT_RATE: 0.3378414, ARHR_ALL_HITS: 0.1990715, NOVELTY: 0.0044821, AVERAGE_POPULARITY: 0.1731950, DIVERSITY_MEAN_INTER_LIST: 0.9511248, DIVERSITY_HERFINDAHL: 0.9951102, COVERAGE_ITEM: 0.5063043, COVERAGE_ITEM_HIT: 0.0594524, ITEMS_IN_GT: 0.9925736, COVERAGE_USER: 1.0000000, COVERAGE_USER_HIT: 0.3378414, USERS_IN_GT: 1.0000000, DIVERSITY_GINI: 0.0431038, SHANNON_ENTROPY: 9.4703120, RATIO_DIVERSITY_HERFINDAHL: 0.9954387, RATIO_DIVERSITY_GINI: 0.0847317, RATIO_SHANNON_ENTROPY: 0.7018222, RATIO_AVERAGE

In [8]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.049646,0.081277,0.076928,0.024116,0.039132,0.166294,0.079673,0.060347,0.337841,0.199072,...,1.0,0.337841,1.0,0.043104,9.470312,0.995439,0.084732,0.701822,3.422366,0.256438
1,10,0.045307,0.074159,0.070195,0.021887,0.035449,0.153839,0.072794,0.05507,0.315189,0.182537,...,1.0,0.315189,1.0,0.037386,9.013278,0.992463,0.073493,0.667953,3.913616,0.254207
2,10,0.045845,0.075296,0.071307,0.022223,0.036141,0.156414,0.074006,0.05581,0.319249,0.185454,...,1.0,0.319249,1.0,0.034196,8.902093,0.992004,0.067222,0.659713,4.068011,0.251926
3,10,0.049216,0.079335,0.074938,0.02386,0.038386,0.163821,0.078267,0.059412,0.332965,0.196593,...,1.0,0.332965,1.0,0.086583,10.52714,0.99781,0.170201,0.780141,2.588467,0.274311
4,10,0.048742,0.08007,0.075846,0.023623,0.038429,0.163902,0.078387,0.059346,0.334502,0.195626,...,1.0,0.334502,1.0,0.03551,9.101169,0.993664,0.069805,0.674466,3.838576,0.252014
5,10,0.04119,0.065641,0.06186,0.019811,0.031885,0.142557,0.065895,0.049452,0.292512,0.167336,...,1.0,0.292512,1.0,0.112926,11.467335,0.999294,0.221985,0.849817,1.503789,0.297623
6,10,0.049574,0.081277,0.076951,0.024149,0.039203,0.166503,0.079739,0.0603,0.337673,0.199367,...,1.0,0.337673,1.0,0.042957,9.440451,0.995288,0.084443,0.699609,3.467035,0.255961
7,10,0.050083,0.081912,0.077544,0.024293,0.039472,0.167268,0.080301,0.060859,0.340604,0.200377,...,1.0,0.340604,1.0,0.046022,9.461895,0.995182,0.090469,0.701198,3.523157,0.255897
8,10,0.049357,0.080971,0.076695,0.023977,0.039001,0.165959,0.079398,0.060062,0.337265,0.198329,...,1.0,0.337265,1.0,0.038269,9.205144,0.994186,0.075228,0.682171,3.744309,0.252835
9,10,0.046446,0.075123,0.070924,0.022264,0.035821,0.154576,0.073654,0.056133,0.319657,0.184433,...,1.0,0.319657,1.0,0.043692,9.843116,0.9968,0.085887,0.72945,2.86527,0.26452


In [7]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,epochs,symmetric,sgd_mode,lambda_i,lambda_j,learning_rate
0,166.0,1499.0,True,adam,0.000124,0.001693,0.000103
1,1324.0,1499.0,False,adagrad,3.2e-05,0.006309,0.016491
2,1191.0,1499.0,False,adam,0.002197,2.7e-05,0.00117
3,,,,,,,
4,,,,,,,
5,,,,,,,
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,,,,,,,


In [9]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 1975, 'alpha': 0.14188000443238327}

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

['Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 395, in _evaluate_on_validation\n    self.model_counter: recommender_instance.get_validation_summary_table()\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\Recommenders\\Incremental_Training_Early_Stopping.py", line 110, in get_validation_summary_table\n    return self._earlystopping_validation_summary_df.copy()\nAttributeError: \'NoneType\' object has no attribute \'copy\'\n',
 'Traceback (most recent call last):\n  File "d:\\Developing\\RecSys_PoliMi_challenge_2020\\HyperparameterTuning\\SearchAbstractClass.py", line 604, in _objective_function\n    result_df, recommender_instance = self._evaluate_on_validation(\n  File "d:\\Developin

In [10]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

TypeError: fit() argument after ** must be a mapping, not NoneType

In [None]:
from utils.create_submission import create_submission

create_submission(recommender)