In [2]:
import pandas as pd
import numpy as np

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

In [3]:
URM_all = load_urm()

URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1523 ( 3.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 948 ( 2.3%) Users that have less than 1 test interactions


In [4]:
recommender_class = EASE_R_Recommender

In [5]:
import os

output_folder_path = "Experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 10
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10

In [6]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "l2_norm": Real(low=1e0, high=1e7, prior='log-uniform'),
}

In [7]:
earlystopping_keywargs = {"validation_every_n": 15,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [8]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                           evaluator_validation=evaluator_validation)

In [9]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS ={}
)

In [10]:
hyperparameterSearch.search(recommender_input_args = recommender_input_args,
                            recommender_input_args_last_test = recommender_input_args_last_test,
                            hyperparameter_search_space = hyperparameters_range_dictionary,
                            n_cases = n_cases,
                            n_random_starts = n_random_starts,
                            save_model = "last",
                            output_folder_path = output_folder_path, # Where to save the results
                            output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                            metric_to_optimize = metric_to_optimize,
                            cutoff_to_optimize = cutoff_to_optimize,
                            )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'l2_norm': 736137.6311073073}
EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 10.55 min
EvaluatorHoldout: Processed 40106 (100.0%) in 55.33 sec. Users per second: 725
SearchBayesianSkopt: New best config found. Config 0: {'l2_norm': 736137.6311073073} - results: PRECISION: 0.0261457, PRECISION_RECALL_MIN_DEN: 0.0521288, RECALL: 0.0503586, MAP: 0.0110257, MAP_MIN_DEN: 0.0217706, MRR: 0.0893565, NDCG: 0.0450436, F1: 0.0344206, HIT_RATE: 0.2100683, ARHR_ALL_HITS: 0.0990413, NOVELTY: 0.0035316, AVERAGE_POPULARITY: 0.4625847, DIVERSITY_MEAN_INTER_LIST: 0.4587089, DIVERSITY_HERFINDAHL: 0.9458697, COVERAGE_ITEM: 0.0203615, COVERAGE_ITEM_HIT: 0.0082017, ITEMS_IN_GT: 0.9869833, COVERAGE_USER: 0.9634149, COVERAGE_USER_HIT: 0.2023830, USERS_IN_GT: 0.9634149, DIVERSITY_GINI: 0.0010041, SHANNON_ENTROPY: 4.8263749, RATIO_DIVERSITY_HERFINDAHL: 0.9461821, RATIO_DIVERSI

In [None]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [None]:
""""
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender = EASE_R(URM_all)
recommender_EASE_R.fit(l2_norm=)

In [None]:
""""
print("MAP of the starting model")

result_df, _ = evaluator_test.evaluateRecommender(recommender)
print("EASE_R - MAP: {}".format(result_df.loc[10]["MAP"]))

## Submissions

In [None]:
#test_users = pd.read_csv('Dataset/data_target_users_test.csv')

In [None]:
""""
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender_object.recommend(user, cutoff=10))

In [None]:
""""
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])

test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('Submissions\Submission_EASE_R_Recommender.csv', index=False)