In [1]:
import os

from utils import *
from Recommenders.DataIO import DataIO
from Evaluation.Evaluator import EvaluatorHoldout

In [2]:
data = load_data()
users = load_users()
data, num_users, num_items, mapping_user_id = preprocess_data(data)
data_train, data_validation, data_test = split_data(
    data,
    num_users=num_users,
    num_items=num_items,
    validation_percentage=0.1,
    testing_percentage=0.20
)

In [3]:
evaluator_validation = EvaluatorHoldout(data_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(data_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 4139 (32.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2193 (17.4%) Users that have less than 1 test interactions


In [4]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([500]),
    "num_factors": Integer(1, 200),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

In [5]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": "MAP",
                          }

In [6]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = ItemKNNCFRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

2023-11-29 10:24:22.290106: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-11-29 10:24:22.290128: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [7]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,     # Additional hyperparameters for the fit function
)

In [8]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [data_validation],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,
)

In [9]:
output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 50
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [11]:
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

recommender_class = IALSRecommender

runHyperparameterSearch_Collaborative(recommender_class,
       URM_train = data_train,
       URM_train_last_test = data_train + data_validation,
       metric_to_optimize = metric_to_optimize,
       cutoff_to_optimize = cutoff_to_optimize,
       evaluator_validation_earlystopping = evaluator_validation,
       evaluator_validation = evaluator_validation,
       evaluator_test = evaluator_test,
       output_folder_path = output_folder_path,
       parallelizeKNN = True,
       allow_weighting = True,
       resume_from_saved = True,
       save_model = "best",
       similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"],
       n_cases = n_cases,
       n_random_starts = n_random_starts)

SearchBayesianSkopt: Resuming 'IALSRecommender'... Loaded 16 configurations.
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3665
Function value obtained: -0.0136
Current minimum: -0.0136
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 1, 'epochs': 300, 'confidence_scaling': 'linear', 'alpha': 2.287230697678018, 'epsilon': 0.05911363226491026, 'reg': 1e-05}
IALSRecommender: URM Detected 312 ( 2.5%) users with no interactions.
IALSRecommender: URM Detected 198 ( 0.9%) items with no interactions.
IALSRecommender: Epoch 1 of 300. Elapsed time 0.63 sec
IALSRecommender: Epoch 2 of 300. Elapsed time 1.23 sec
IALSRecommender: Epoch 3 of 300. Elapsed time 1.83 sec
IALSRecommender: Epoch 4 of 300. Elapsed time 2.42 sec
IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 8499 (100.0%) in 4.55 sec. Users per second: 1869
IALSRecommend

Traceback (most recent call last):
  File "/home/kinami/code/rcs/HyperparameterTuning/SearchAbstractClass.py", line 468, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/home/kinami/code/rcs/HyperparameterTuning/SearchAbstractClass.py", line 326, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current_fit_hyperparameters)
  File "/home/kinami/code/rcs/HyperparameterTuning/SearchAbstractClass.py", line 297, in _fit_model
    recommender_instance.fit(*self.recommender_input_args.FIT_POSITIONAL_ARGS,
  File "/home/kinami/code/rcs/Recommenders/MatrixFactorization/IALSRecommender.py", line 88, in fit
    self._train_with_early_stopping(epochs,
  File "/home/kinami/code/rcs/Recommenders/Incremental_Training_Early_Stopping.py", line 199, in _train_with_early_stopping
    self._run_epoch(epochs_current)
  File "/home/kina

Iteration No: 17 ended. Search finished for the next optimal point.
Time taken: 14.0579
Function value obtained: 65504.0000
Current minimum: -0.0136
Iteration No: 18 started. Searching for the next optimal point.
SearchBayesianSkopt: Testing config: {'num_factors': 150, 'epochs': 300, 'confidence_scaling': 'linear', 'alpha': 0.001, 'epsilon': 0.018519762534421967, 'reg': 1e-05}
IALSRecommender: URM Detected 312 ( 2.5%) users with no interactions.
IALSRecommender: URM Detected 198 ( 0.9%) items with no interactions.
IALSRecommender: Epoch 1 of 300. Elapsed time 17.63 sec
IALSRecommender: Epoch 2 of 300. Elapsed time 34.93 sec
IALSRecommender: Epoch 3 of 300. Elapsed time 52.38 sec
IALSRecommender: Epoch 4 of 300. Elapsed time 1.16 min
IALSRecommender: Validation begins...
EvaluatorHoldout: Processed 8499 (100.0%) in 5.03 sec. Users per second: 1689
IALSRecommender: CUTOFF: 10 - PRECISION: 0.0264266, PRECISION_RECALL_MIN_DEN: 0.0784093, RECALL: 0.0765028, MAP: 0.0114247, MAP_MIN_DEN: 0.0

In [12]:
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

In [13]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'num_factors': 197,
 'epochs': 55,
 'confidence_scaling': 'log',
 'alpha': 3.316346590859259,
 'epsilon': 1.262226778640135,
 'reg': 0.01}

In [14]:
search_metadata['result_on_test_best']

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.064605,0.116092,0.100004,0.030891,0.056247,0.201536,0.105286,0.078498,0.416467,0.248,...,0.826476,0.3442,0.826476,0.023701,9.471476,0.998339,0.068454,0.729997,1.566796,0.299572


In [15]:
recommender_object = recommender_class(data_train + data_validation)

IALSRecommender: URM Detected 212 ( 1.7%) users with no interactions.
IALSRecommender: URM Detected 105 ( 0.5%) items with no interactions.


In [16]:
recommender_object.load_model(output_folder_path, 
                              file_name = recommender_object.RECOMMENDER_NAME + "_best_model_last.zip" )

IALSRecommender: Loading model from file 'result_experiments/IALSRecommender_best_model_last.zip'
IALSRecommender: Loading complete


In [17]:
def prepare_submission(ratings: pd.DataFrame, users_to_recommend: np.array, urm_train: sp.csr_matrix, recommender: object):
    users_ids_and_mappings = ratings[ratings.user_id.isin(users_to_recommend)][["user_id", "mapped_user_id"]].drop_duplicates()

    mapping_to_item_id = dict(zip(ratings.mapped_item_id, ratings.item_id))
    item_ids = ratings.item_id.unique()


    recommendation_length = 10
    submission = dict()
    for idx, row in users_ids_and_mappings.iterrows():
        user_id = row.user_id
        mapped_user_id = row.mapped_user_id

        recommendations = recommender.recommend(mapped_user_id, cutoff = 10)

        submission[user_id] = [mapping_to_item_id[item_id] for item_id in recommendations]
    
    for user_id in users_to_recommend:
        if user_id not in submission:
            submission[user_id] = np.random.choice(item_ids, 10)

    return submission

In [None]:
from utils import write_submission

write_submission(prepare_submission(data, users, data_train + data_validation, recommender_object))