In [2]:
import os

from utils import *
from Recommenders.DataIO import DataIO
from Evaluation.Evaluator import EvaluatorHoldout

In [3]:
data = load_data()
users = load_users()
data, num_users, num_items, mapping_user_id = preprocess_data(data)
data_train, data_validation, data_test = split_data(
    data,
    num_users=num_users,
    num_items=num_items,
    validation_percentage=0.1,
    testing_percentage=0.20
)

In [4]:
evaluator_validation = EvaluatorHoldout(data_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(data_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4145 (32.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2158 (17.1%) Users that have less than 1 test interactions


In [4]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([500]),
    "num_factors": Integer(1, 200),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

In [5]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": "MAP",
                          }

In [None]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = ItemKNNCFRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

In [6]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,     # Additional hyperparameters for the fit function
)

2023-11-22 15:25:58.892341: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-11-22 15:25:58.892386: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


NameError: name 'earlystopping_keywargs' is not defined

In [None]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_validation],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,
)

In [10]:
output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [9]:
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative, runHyperparameterSearch_Content
from Recommenders.NonPersonalizedRecommender import TopPop, Random
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommender_class = RP3betaRecommender

runHyperparameterSearch_Collaborative(recommender_class,
       URM_train = data_train,
       URM_train_last_test = data_train + data_validation,
       metric_to_optimize = metric_to_optimize,
       cutoff_to_optimize = cutoff_to_optimize,
       evaluator_validation_earlystopping = evaluator_validation,
       evaluator_validation = evaluator_validation,
       evaluator_test = evaluator_test,
       output_folder_path = output_folder_path,
       parallelizeKNN = True,
       allow_weighting = True,
       resume_from_saved = True,
       save_model = "best",
       similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"],
       n_cases = n_cases,
       n_random_starts = n_random_starts)

SearchBayesianSkopt: Resuming 'RP3betaRecommender' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 796, 'alpha': 1.8250841551967478, 'beta': 1.4798467355186267, 'normalize_similarity': False}
RP3betaRecommender: URM Detected 356 ( 2.8%) users with no interactions.
RP3betaRecommender: URM Detected 201 ( 0.9%) items with no interactions.
RP3betaRecommender: Similarity column 22222 (100.0%), 1943.02 column/sec. Elapsed time 11.44 sec
EvaluatorHoldout: Processed 8473 (100.0%) in 5.84 sec. Users per second: 1451
SearchBayesianSkopt: New best config found. Config 0: {'topK': 796, 'alpha': 1.8250841551967478, 'beta': 1.4798467355186267, 'normalize_similarity': False} - results: PRECISION: 0.0018175, PRECISION_RECALL_MIN_DEN: 0.0046325, RECALL: 0.0044285, MAP: 0.0006008, MAP_MIN_DEN: 0.0015096, MRR: 0.0056881, NDCG: 0.0032203, F1: 0.0025773, HIT_RATE: 0.0172312, ARHR_ALL_HITS: 0.0058480, NOVELTY: 0.007522

In [11]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
recommender_class = RP3betaRecommender
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

In [12]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 96,
 'alpha': 0.2442006775658275,
 'beta': 0.2447989297297578,
 'normalize_similarity': True}

In [13]:
search_metadata['result_on_test_best']

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.084112,0.149117,0.12712,0.040705,0.07194,0.242596,0.13322,0.101237,0.496378,0.310886,...,0.830195,0.412091,0.830195,0.070386,10.076245,0.996416,0.202897,0.776438,2.040238,0.319829


In [14]:
recommender_object = recommender_class(data_train + data_validation)

RP3betaRecommender: URM Detected 213 ( 1.7%) users with no interactions.
RP3betaRecommender: URM Detected 121 ( 0.5%) items with no interactions.


In [15]:
recommender_object.load_model(output_folder_path, 
                              file_name = recommender_object.RECOMMENDER_NAME + "_best_model_last.zip" )

RP3betaRecommender: Loading model from file 'result_experiments/RP3betaRecommender_best_model_last.zip'


RP3betaRecommender: Loading complete


In [16]:
def prepare_submission(ratings: pd.DataFrame, users_to_recommend: np.array, urm_train: sp.csr_matrix, recommender: object):
    users_ids_and_mappings = ratings[ratings.user_id.isin(users_to_recommend)][["user_id", "mapped_user_id"]].drop_duplicates()

    mapping_to_item_id = dict(zip(ratings.mapped_item_id, ratings.item_id))
    item_ids = ratings.item_id.unique()


    recommendation_length = 10
    submission = dict()
    for idx, row in users_ids_and_mappings.iterrows():
        user_id = row.user_id
        mapped_user_id = row.mapped_user_id

        recommendations = recommender.recommend(mapped_user_id, cutoff = 10)

        submission[user_id] = [mapping_to_item_id[item_id] for item_id in recommendations]
    
    for user_id in users_to_recommend:
        if user_id not in submission:
            submission[user_id] = np.random.choice(item_ids, 10)

    return submission

In [17]:
from utils import write_submission

write_submission(prepare_submission(data, users, data_train + data_validation, recommender_object))