In [64]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('../..')


In [65]:
dataset = pd.read_csv('../data_train.csv')

In [66]:
users = dataset.row
items = dataset.col
data = dataset.data
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 

In [67]:
ICM_df = pd.read_csv('../data_ICM_title_abstract.csv')
test_users = pd.read_csv('../data_target_users_test.csv')

In [68]:
items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 

In [69]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])



In [70]:
URM_train_ICM_all = sps.vstack([URM_train, ICM_all.T])

In [72]:
from skopt.space import Real, Integer, Categorical
from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from GraphBased.RP3betaRecommender import RP3betaRecommender

hyperparameters_range_dictionary = {}
hyperparameters_range_dictionary["topK"] = Integer(5, 900)
hyperparameters_range_dictionary["alpha"] = Real(low = 0, high = 1, prior = 'uniform')
hyperparameters_range_dictionary["beta"] = Real(low = 0, high = 1, prior = 'uniform')

grouped_users = dataset.groupby(['row']).count()

# All users present into the dataser
sorted_users = grouped_users.sort_values(by=['col'], ascending=True)
sorted_users = sorted_users.index.to_numpy()
print(len(sorted_users))

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

recommender_class = RP3betaRecommender

parameterSearch = SearchBayesianSkopt(recommender_class,
                                 evaluator_validation=evaluator_validation,
                                 evaluator_test=evaluator_validation)

import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10
n_random_starts = int(n_cases*0.3)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_ICM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_ICM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

metric_to_optimize = "MAP"

parameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       parameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path,
                       output_file_name_root = recommender_class.RECOMMENDER_NAME,
                       metric_to_optimize = metric_to_optimize,
                      )

from Base.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)

search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

best_parameters = search_metadata["hyperparameters_best"]
best_parameters

7947
Iteration No: 1 started. Evaluating function at random point.
RP3betaRecommender: URM Detected 52 (0.19 %) cold users.
RP3betaRecommender: URM Detected 2 (0.01 %) cold items.
SearchBayesianSkopt: Testing config: {'topK': 681, 'alpha': 0.6505112610916715, 'beta': 0.2389622765604527}
EvaluatorHoldout: Processed 4981 ( 100.00% ) in 3.56 sec. Users per second: 1399
SearchBayesianSkopt: New best config found. Config 0: {'topK': 681, 'alpha': 0.6505112610916715, 'beta': 0.2389622765604527} - results: ROC_AUC: 0.1794971, PRECISION: 0.0377033, PRECISION_RECALL_MIN_DEN: 0.1496436, RECALL: 0.1466433, MAP: 0.0670771, MRR: 0.1310790, NDCG: 0.1010738, F1: 0.0599841, HIT_RATE: 0.3770327, ARHR: 0.1486151, NOVELTY: 0.0054333, AVERAGE_POPULARITY: 0.1210021, DIVERSITY_MEAN_INTER_LIST: 0.9936295, DIVERSITY_HERFINDAHL: 0.9993430, COVERAGE_ITEM: 0.5526853, COVERAGE_ITEM_CORRECT: 0.0487777, COVERAGE_USER: 0.6267774, COVERAGE_USER_CORRECT: 0.1804455, DIVERSITY_GINI: 0.2463257, SHANNON_ENTROPY: 12.564777

{'topK': 848, 'alpha': 0.44760921022259814, 'beta': 0.15434611134474233}

In [73]:
'''for index in range(len(recommendations)):
    #print(element)
    recommendations[index]=np.array(recommendations[index])
    #print(type(element))
print(len(recommendations))'''

'for index in range(len(recommendations)):\n    #print(element)\n    recommendations[index]=np.array(recommendations[index])\n    #print(type(element))\nprint(len(recommendations))'

In [74]:
'''test_users['item_list']= recommendations
#test_users['item_list'] =  test_users['item_list'].apply(lambda x: x.replace('[','').replace(']','')) 
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])

#convert the string columns to int
#test_users['item_list'] = test_users['item_list'].astype(int)
test_users
test_users.to_csv('submission.csv', index=False)'''


'test_users[\'item_list\']= recommendations\n#test_users[\'item_list\'] =  test_users[\'item_list\'].apply(lambda x: x.replace(\'[\',\'\').replace(\']\',\'\')) \ntest_users[\'item_list\'] = pd.DataFrame([str(line).strip(\'[\').strip(\']\').replace("\'","") for line in test_users[\'item_list\']])\n\n#convert the string columns to int\n#test_users[\'item_list\'] = test_users[\'item_list\'].astype(int)\ntest_users\ntest_users.to_csv(\'submission.csv\', index=False)'