In [1]:
import os

from utils import *
from Recommenders.DataIO import DataIO
from Evaluation.Evaluator import EvaluatorHoldout

In [2]:
data = load_data()
users = load_users()
data, num_users, num_items, mapping_user_id = preprocess_data(data)
data_train, data_validation, data_test = split_data(
    data,
    num_users=num_users,
    num_items=num_items,
    validation_percentage=0.1,
    testing_percentage=0.20
)

In [3]:
evaluator_validation = EvaluatorHoldout(data_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(data_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4170 (33.0%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2169 (17.2%) Users that have less than 1 test interactions


In [4]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([500]),
    "num_factors": Integer(1, 200),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

In [5]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": "MAP",
                          }

In [6]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = ItemKNNCFRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

2023-11-30 13:21:28.096035: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-11-30 13:21:28.096052: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [7]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,     # Additional hyperparameters for the fit function
)

In [8]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_validation],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,
)

In [9]:
output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [10]:
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender

recommender_class = ItemKNNCFRecommender

runHyperparameterSearch_Collaborative(recommender_class,
       URM_train = data_train,
       URM_train_last_test = data_train + data_validation,
       metric_to_optimize = metric_to_optimize,
       cutoff_to_optimize = cutoff_to_optimize,
       evaluator_validation_earlystopping = evaluator_validation,
       evaluator_validation = evaluator_validation,
       evaluator_test = evaluator_test,
       output_folder_path = output_folder_path,
       parallelizeKNN = True,
       allow_weighting = True,
       resume_from_saved = True,
       save_model = "best",
       similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"],
       n_cases = n_cases,
       n_random_starts = n_random_starts)

SearchBayesianSkopt: Resuming 'ItemKNNCFRecommender_cosine' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 824, 'shrink': 106, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'TF-IDF'}
ItemKNNCFRecommender: URM Detected 319 ( 2.5%) users with no interactions.
ItemKNNCFRecommender: URM Detected 193 ( 0.9%) items with no interactions.
SearchBayesianSkopt: Resuming 'ItemKNNCFRecommender_jaccard' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 622, 'shrink': 747, 'similarity': 'jaccard', 'normalize': False}
ItemKNNCFRecommender: URM Detected 319 ( 2.5%) users with no interactions.
ItemKNNCFRecommender: URM Detected 193 ( 0.9%) items with no interactions.
SearchBayesianSkopt: Resuming 'ItemKNNCFRecommender_asymmetric' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at ra



SearchBayesianSkopt: Testing config: {'topK': 5, 'shrink': 207, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'TF-IDF'}
SearchBayesianSkopt: Config 49 was already explored at index 40. Config: {'topK': 5, 'shrink': 207, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'TF-IDF'} - results: PRECISION: 0.0400449, PRECISION_RECALL_MIN_DEN: 0.1173449, RECALL: 0.1141223, MAP: 0.0169360, MAP_MIN_DEN: 0.0501030, MRR: 0.1345777, NDCG: 0.0873356, F1: 0.0592865, HIT_RATE: 0.3149504, ARHR_ALL_HITS: 0.1507797, NOVELTY: 0.0055121, AVERAGE_POPULARITY: 0.1502704, DIVERSITY_MEAN_INTER_LIST: 0.9848256, DIVERSITY_HERFINDAHL: 0.9984709, COVERAGE_ITEM: 0.4300693, COVERAGE_ITEM_HIT: 0.0592206, ITEMS_IN_GT: 0.5522005, COVERAGE_USER: 0.6700427, COVERAGE_USER_HIT: 0.2110302, USERS_IN_GT: 0.6700427, DIVERSITY_GINI: 0.1003592, SHANNON_ENTROPY: 10.9717197, RATIO_DIVERSITY_HERFINDAHL: 0.9988302, RATIO_DIVERSITY_GINI: 0.2892056, RATIO_SHANNON_ENTROPY: 0.8453313, RATIO_AVERAGE_POPULA

In [29]:
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + '_tversky' + "_metadata.zip")

In [30]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 33,
 'shrink': 15,
 'similarity': 'tversky',
 'normalize': True,
 'tversky_alpha': 0.04343390030120942,
 'tversky_beta': 1.5891904388185008}

In [31]:
search_metadata['result_on_test_best']

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.084898,0.145643,0.122497,0.041613,0.072249,0.242253,0.132703,0.100289,0.48763,0.313713,...,0.828375,0.40394,0.828375,0.080939,10.443466,0.997663,0.233242,0.804631,1.741265,0.330345


In [32]:
recommender_object = recommender_class(data_train + data_validation)

ItemKNNCFRecommender: URM Detected 212 ( 1.7%) users with no interactions.
ItemKNNCFRecommender: URM Detected 113 ( 0.5%) items with no interactions.


In [33]:
recommender_object.load_model(output_folder_path, 
                              file_name = recommender_object.RECOMMENDER_NAME + "_best_model_last.zip" )

ItemKNNCFRecommender: Loading model from file 'result_experiments/ItemKNNCFRecommender_best_model_last.zip'
ItemKNNCFRecommender: Loading complete


In [16]:
def prepare_submission(ratings: pd.DataFrame, users_to_recommend: np.array, urm_train: sp.csr_matrix, recommender: object):
    users_ids_and_mappings = ratings[ratings.user_id.isin(users_to_recommend)][["user_id", "mapped_user_id"]].drop_duplicates()

    mapping_to_item_id = dict(zip(ratings.mapped_item_id, ratings.item_id))
    item_ids = ratings.item_id.unique()


    recommendation_length = 10
    submission = dict()
    for idx, row in users_ids_and_mappings.iterrows():
        user_id = row.user_id
        mapped_user_id = row.mapped_user_id

        recommendations = recommender.recommend(mapped_user_id, cutoff = 10)

        submission[user_id] = [mapping_to_item_id[item_id] for item_id in recommendations]
    
    for user_id in users_to_recommend:
        if user_id not in submission:
            submission[user_id] = np.random.choice(item_ids, 10)

    return submission

In [None]:
from utils import write_submission

write_submission(prepare_submission(data, users, data_train + data_validation, recommender_object))