In [24]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
import FileManager

In [25]:
ratings = FileManager.load_data()
users_to_recommend = np.array(FileManager.load_target()).squeeze()
urm_all,urm_train,urm_validation= FileManager.split_data(ratings)
urm_train_validation = urm_train + urm_validation

> Importing file...
> Importing file... Completed!
> Importing file...
> Importing file... Completed!


In [26]:
from Evaluation.Evaluator import EvaluatorHoldout

evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13644 ( 0.0%) Users that have less than 1 test interactions


In [27]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "topK": Integer(200,250),
    "shrink": Integer(1,5),
    "similarity": Categorical(["cosine"]),
    "normalize": Categorical([True]),
}

In [28]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = ItemKNNCFRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation)

from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train_validation],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [29]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10  # NUMBER OF CASES
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [30]:
# RUN THE OPTIMIZER

hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 241, 'shrink': 1, 'similarity': 'cosine', 'normalize': True}
Similarity column 18059 (100.0%), 1037.85 column/sec. Elapsed time 17.40 sec
EvaluatorHoldout: Processed 13644 (100.0%) in 29.43 sec. Users per second: 464
SearchBayesianSkopt: New best config found. Config 0: {'topK': 241, 'shrink': 1, 'similarity': 'cosine', 'normalize': True} - results: PRECISION: 0.3463574, PRECISION_RECALL_MIN_DEN: 0.3476477, RECALL: 0.0590467, MAP: 0.2094378, MAP_MIN_DEN: 0.2100382, MRR: 0.6098080, NDCG: 0.3644994, F1: 0.1008932, HIT_RATE: 0.9500147, ARHR_ALL_HITS: 1.1117055, NOVELTY: 0.0053048, AVERAGE_POPULARITY: 0.6654809, DIVERSITY_MEAN_INTER_LIST: 0.7990288, DIVERSITY_HERFINDAHL: 0.9798970, COVERAGE_ITEM: 0.0532698, COVERAGE_ITEM_CORRECT: 0.0285730, COVERAGE_USER: 0.9995604, COVERAGE_USER_CORRECT: 0.9495971, DIVERSITY_GINI: 0.0037915, SHANNON_ENTROPY: 6.3108431, RATIO_DIVERSITY_HERFINDAHL: 0.

In [31]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")
search_metadata.keys()
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize
0,241,1,cosine,True
1,236,4,cosine,True
2,205,3,cosine,True
3,245,1,cosine,True
4,217,3,cosine,True
5,203,5,cosine,True
6,250,5,cosine,True
7,227,1,cosine,True
8,209,3,cosine,True
9,225,5,cosine,True


In [32]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_ITEM_CORRECT,COVERAGE_USER,COVERAGE_USER_CORRECT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.346357,0.347648,0.059047,0.209438,0.210038,0.609808,0.364499,0.100893,0.950015,1.111706,...,0.028573,0.99956,0.949597,0.003792,6.310843,0.980275,0.015287,0.509326,3.276105,0.024961
1,10,0.346489,0.347804,0.059065,0.209629,0.210245,0.611043,0.36492,0.100925,0.949721,1.113767,...,0.028628,0.99956,0.949304,0.003801,6.315361,0.980343,0.015324,0.50969,3.275339,0.024958
2,10,0.347222,0.348535,0.059229,0.210669,0.211334,0.616691,0.36636,0.101196,0.950894,1.120161,...,0.028905,0.99956,0.950476,0.003982,6.386984,0.981351,0.016054,0.515471,3.251382,0.024991
3,10,0.346826,0.348104,0.059064,0.20962,0.210208,0.611707,0.364939,0.100939,0.950308,1.113314,...,0.028407,0.99956,0.94989,0.003786,6.307683,0.980212,0.015265,0.509071,3.277581,0.024959
4,10,0.346782,0.348124,0.059271,0.210579,0.211243,0.616372,0.366166,0.101239,0.951481,1.120159,...,0.028684,0.99956,0.951062,0.003908,6.355163,0.980916,0.015755,0.512903,3.26235,0.024977
5,10,0.346856,0.348192,0.059084,0.210835,0.211504,0.616509,0.366292,0.100969,0.950528,1.120643,...,0.028628,0.99956,0.95011,0.003984,6.391426,0.98145,0.016062,0.515829,3.250148,0.02499
6,10,0.34646,0.34776,0.059008,0.209405,0.21,0.613828,0.364965,0.100841,0.95214,1.114563,...,0.027964,0.99956,0.951722,0.003708,6.284724,0.979982,0.014949,0.507218,3.284032,0.024944
7,10,0.346489,0.347795,0.059233,0.210095,0.21071,0.613482,0.365392,0.10117,0.950381,1.116622,...,0.028684,0.99956,0.949963,0.003873,6.340674,0.980689,0.015613,0.511733,3.266887,0.024973
8,10,0.347054,0.348344,0.059202,0.210928,0.2116,0.61716,0.366514,0.101149,0.950821,1.121442,...,0.028795,0.99956,0.950403,0.003954,6.374166,0.981179,0.015941,0.514436,3.255572,0.024986
9,10,0.346651,0.347966,0.059221,0.210361,0.210992,0.613936,0.365661,0.10116,0.949868,1.117659,...,0.028185,0.99956,0.949451,0.003844,6.336621,0.980679,0.015499,0.511406,3.268837,0.024965


In [33]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 209, 'shrink': 3, 'similarity': 'cosine', 'normalize': True}