In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import seaborn as snb

# Random seed for reproducibility
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [2]:
from data_manager import DatasetLoader, DatasetSplitter, URMGenerator

dataset_loader = DatasetLoader()
dataset_splitter = DatasetSplitter(dataset_loader)
dataset_train, dataset_val = dataset_splitter.load_train_val()
URM_generator = URMGenerator(dataset_train, dataset_val)
URM_train, URM_val = URM_generator.generate_implicit_URM()
URM_all = URM_train + URM_val

Loading previusly generated splits...
Generating implicit URM...


In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [4]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

output_folder_path = "result_experiments/ItemKNNCFRecommender_implicit/"
recommender_class = ItemKNNCFRecommender
n_cases = 15
n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Define hyperparameters
hyperparameters_range_dictionary = {
    "topK": Integer(5, 2000),
    "shrink": Integer(0, 1000),
    "similarity": Categorical(["cosine", "jaccard"]),
    "normalize": Categorical([True, False]),
}

hyperparameter_search = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)


In [5]:
hyperparameter_search.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 1444, 'shrink': 158, 'similarity': 'cosine', 'normalize': True}
Similarity column 24507 (100.0%), 1705.53 column/sec. Elapsed time 14.37 sec
EvaluatorHoldout: Processed 41629 (100.0%) in 1.49 min. Users per second: 465
SearchBayesianSkopt: New best config found. Config 0: {'topK': 1444, 'shrink': 158, 'similarity': 'cosine', 'normalize': True} - results: PRECISION: 0.0424031, PRECISION_RECALL_MIN_DEN: 0.0710910, RECALL: 0.0673407, MAP: 0.0202406, MAP_MIN_DEN: 0.0335933, MRR: 0.1461785, NDCG: 0.0691522, F1: 0.0520386, HIT_RATE: 0.3037066, ARHR_ALL_HITS: 0.1710519, NOVELTY: 0.0039505, AVERAGE_POPULARITY: 0.2927468, DIVERSITY_MEAN_INTER_LIST: 0.8333346, DIVERSITY_HERFINDAHL: 0.9833315, COVERAGE_ITEM: 0.1281267, COVERAGE_ITEM_HIT: 0.0277880, ITEMS_IN_GT: 0.9925736, COVERAGE_USER: 1.0000000, COVERAGE_USER_HIT: 0.3037066, USERS_IN_GT: 1.0000000, DIVERSITY_GINI: 0.0069009, SHANNON_ENTRO

In [6]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    recommender_class.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.042403,0.071091,0.067341,0.020241,0.033593,0.146178,0.069152,0.052039,0.303707,0.171052,...,1.0,0.303707,1.0,0.006901,7.193678,0.983656,0.013566,0.533106,5.784731,0.226022
1,10,0.033791,0.058068,0.055263,0.015093,0.025858,0.116139,0.054854,0.041939,0.25881,0.131856,...,1.0,0.25881,1.0,0.005176,6.039486,0.964929,0.010175,0.447572,7.697395,0.212611
2,10,0.041973,0.069835,0.066111,0.019853,0.032771,0.143189,0.067799,0.051347,0.300824,0.16768,...,1.0,0.300824,1.0,0.009603,7.727774,0.989133,0.018877,0.572687,4.63696,0.233681
3,10,0.039028,0.066171,0.062823,0.018314,0.030748,0.135537,0.063867,0.048146,0.286459,0.156759,...,1.0,0.286459,1.0,0.004223,6.479268,0.975637,0.008302,0.480163,6.82701,0.217355
4,10,0.04045,0.067661,0.064087,0.019115,0.031608,0.139098,0.065598,0.049596,0.29232,0.162202,...,1.0,0.29232,1.0,0.006159,7.115314,0.984453,0.012107,0.527299,5.549101,0.225353
5,10,0.0396,0.067073,0.063666,0.018663,0.031321,0.137703,0.064883,0.048829,0.289438,0.159492,...,1.0,0.289438,1.0,0.00398,6.495305,0.976352,0.007825,0.481352,6.765647,0.217703
6,10,0.042977,0.071012,0.067313,0.02029,0.033359,0.144969,0.068983,0.05246,0.305844,0.170606,...,1.0,0.305844,1.0,0.028359,8.812486,0.992994,0.055747,0.653072,3.679318,0.250827
7,10,0.042972,0.07094,0.067236,0.020286,0.033322,0.144864,0.068923,0.052433,0.3057,0.170532,...,1.0,0.3057,1.0,0.028676,8.819562,0.993009,0.05637,0.653597,3.668886,0.251032
8,10,0.043422,0.071966,0.068142,0.020684,0.034069,0.148106,0.070168,0.053043,0.308991,0.174015,...,1.0,0.308991,1.0,0.015208,8.280592,0.991543,0.029895,0.613655,4.11324,0.241297
9,10,0.040515,0.067832,0.064254,0.01913,0.031638,0.139274,0.065695,0.049695,0.292945,0.162352,...,1.0,0.292945,1.0,0.006201,7.118337,0.984435,0.012189,0.527523,5.553061,0.225368


In [7]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize
0,594,913,cosine,False
1,624,959,cosine,False
2,1508,200,cosine,False
3,603,17,cosine,True
4,819,358,cosine,False
5,1218,732,cosine,False
6,1296,34,cosine,False
7,530,259,cosine,True
8,1043,418,cosine,True
9,2000,0,cosine,True


In [8]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 500, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [10]:
recommender = recommender_class(URM_all)
recommender.fit(**best_hyperparameters)
recommender.save_model(
    folder_path=output_folder_path,
    file_name=recommender_class.RECOMMENDER_NAME
    + "_best_model_trained_on_everything.zip",
)

Similarity column 24507 (100.0%), 3450.89 column/sec. Elapsed time 7.10 sec
ItemKNNCFRecommender: Saving model in file 'result_experiments/ItemKNNCFRecommender/ItemKNNCFRecommender_best_model_trained_on_everything.zip'
ItemKNNCFRecommender: Saving complete


In [None]:
from utils.create_submission import create_submission

create_submission(recommender)