In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [2]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_implicit_URM()
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating implicit URM...


In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [4]:
from Recommenders.GraphBased.RP3betaRecommender import (
    RP3betaRecommender,
)
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/RP3betaRecommender/"
recommender_class = RP3betaRecommender
n_cases = 50
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Integer(5, 1000),
    "alpha": Real(low=0, high=2, prior="uniform"),
    "beta": Real(low=0, high=2, prior="uniform"),
    "normalize_similarity": Categorical([True, False]),
}

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


In [15]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 360, 'alpha': 0.8850371049339127, 'beta': 1.0245009192138066, 'normalize_similarity': False}
RP3betaRecommender: Similarity column 24507 (100.0%), 1343.75 column/sec. Elapsed time 18.24 sec
EvaluatorHoldout: Processed 41629 (100.0%) in 48.03 sec. Users per second: 867
SearchBayesianSkopt: New best config found. Config 0: {'topK': 360, 'alpha': 0.8850371049339127, 'beta': 1.0245009192138066, 'normalize_similarity': False} - results: PRECISION: 0.0052223, PRECISION_RECALL_MIN_DEN: 0.0067621, RECALL: 0.0058212, MAP: 0.0023450, MAP_MIN_DEN: 0.0030033, MRR: 0.0163788, NDCG: 0.0069300, F1: 0.0055055, HIT_RATE: 0.0371856, ARHR_ALL_HITS: 0.0194794, NOVELTY: 0.0065025, AVERAGE_POPULARITY: 0.0029683, DIVERSITY_MEAN_INTER_LIST: 0.9977669, DIVERSITY_HERFINDAHL: 0.9997743, COVERAGE_ITEM: 0.7220794, COVERAGE_ITEM_HIT: 0.0349288, ITEMS_IN_GT: 0.9925736, COVERAGE_USER: 1.0000000, COVERAGE_USER_H

In [16]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.005222,0.006762,0.005821,0.002345,0.003003,0.016379,0.00693,0.005506,0.037186,0.019479,...,1.0,0.037186,1.0,0.251756,12.939111,1.000104,0.494892,0.958887,0.058655,0.372033
1,10,0.00018,0.000304,0.000253,5.3e-05,8.9e-05,0.000473,0.000238,0.00021,0.001633,0.000502,...,1.0,0.001633,1.0,0.124814,11.927064,0.999868,0.245354,0.883886,0.046095,0.376639
2,10,0.019883,0.027217,0.024584,0.008748,0.011951,0.058887,0.026884,0.021985,0.135531,0.071354,...,1.0,0.135531,1.0,0.269112,12.987393,1.000067,0.529009,0.962465,0.23831,0.35562
3,10,0.004209,0.006029,0.005452,0.002132,0.003071,0.01838,0.006834,0.00475,0.036129,0.019761,...,1.0,0.036129,1.0,0.095041,11.494154,0.999677,0.186828,0.851804,0.053531,0.375319
4,10,0.022405,0.0315,0.028785,0.00982,0.013835,0.069114,0.031109,0.025198,0.15876,0.081907,...,1.0,0.15876,1.0,0.283023,13.009505,1.00003,0.556355,0.964103,0.264485,0.350027
5,10,0.03124,0.049176,0.046352,0.013625,0.021477,0.099472,0.046841,0.037324,0.229335,0.115837,...,1.0,0.229335,1.0,0.19209,11.960051,0.999265,0.377603,0.886331,0.8316,0.311184
6,10,0.000449,0.000632,0.000507,0.000138,0.000184,0.00114,0.000524,0.000476,0.003795,0.001252,...,1.0,0.003795,1.0,0.082769,11.219671,0.999433,0.162705,0.831463,0.042033,0.379736
7,10,0.03536,0.055933,0.052817,0.015669,0.024771,0.113088,0.053468,0.042361,0.255783,0.132417,...,1.0,0.255783,1.0,0.186336,11.796585,0.999092,0.366291,0.874217,1.105997,0.304007
8,10,0.045339,0.069484,0.065115,0.022379,0.034324,0.151995,0.0705,0.053456,0.301953,0.183367,...,1.0,0.301953,1.0,0.249142,11.789132,0.9983,0.489754,0.873664,2.17249,0.297681
9,10,0.042864,0.06456,0.060287,0.020723,0.030934,0.139139,0.064711,0.050104,0.284033,0.168853,...,1.0,0.284033,1.0,0.222091,11.986546,0.998813,0.436579,0.888294,1.839749,0.308556


In [17]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,alpha,beta,normalize_similarity
0,360,0.885037,1.024501,False
1,856,1.430801,1.657735,False
2,685,1.19875,0.896754,True
3,19,0.704016,1.46537,True
4,286,0.614292,0.887789,False
5,767,0.132635,0.812158,True
6,695,1.190963,1.659808,True
7,758,0.23211,0.782584,True
8,197,0.836781,0.434672,False
9,812,1.163582,0.696736,True


In [18]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 28,
 'alpha': 1.1223901997267467,
 'beta': 0.5367651340548403,
 'normalize_similarity': True}

In [19]:
exception_list = search_metadata["exception_list"]
exception_list

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [20]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

RP3betaRecommender: Similarity column 24507 (100.0%), 2824.68 column/sec. Elapsed time 8.68 sec
RP3betaRecommender: Saving model in file 'result_experiments/RP3betaRecommender/RP3betaRecommender_best_model_trained_on_everything.zip'
RP3betaRecommender: Saving complete


In [5]:
recommender = recommender_class(URM_all)
recommender.load_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

RP3betaRecommender: Loading model from file 'result_experiments/RP3betaRecommender/RP3betaRecommender_best_model_trained_on_everything.zip'
RP3betaRecommender: Loading complete


In [6]:
from utils.create_submission import create_submission

create_submission(recommender)

Generating recomendations...
Writing to RP3betaRecommender-2022_12_15_15_33_53.csv...
