# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 5079, done.[K
remote: Counting objects: 100% (151/151), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 5079 (delta 74), reused 82 (delta 32), pack-reused 4928 (from 3)[K
Receiving objects: 100% (5079/5079), 379.22 MiB | 39.16 MiB/s, done.
Resolving deltas: 100% (2923/2923), done.
Updating files: 100% (423/423), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K’:
26256 |         for (__pyx_t_22 = __pyx_v_start_pos_seen_i

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV'
D_PATH = 'Hybrid/LinearCombination'

np.random.seed(42)

## Import the repository

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SecondLinearCombination_Recall',
    'metric': 'Recall',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SecondLinearCombination_Recall.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[50])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


## Import the best recommeders previously trained.

In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommenders = {
    "ItemKNNCF": ItemKNNCFRecommender,
    "SLIMElasticNet": SLIMElasticNetRecommender,
    "RP3beta": RP3betaRecommender
}

paths_to_best_params = {
    "ItemKNNCF": "KNN",
    "SLIMElasticNet": "SLIM",
    "RP3beta": "GraphBased"
}


loaded_recommenders = {}

for recommender_name, recommender_class in recommenders.items():
    
    start_time = time.time()
    
    print(f"{recommender_name} Model - TRAINING with its best parameters.")
    try:
        recommender = recommender_class(URM_train)
    except Exception as e:
        recommender = recommender_class(URM_train, ICM_all)

    # Extract best parameter values of the relative recommender model.
    with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/Optimizing{config["metric"]}/best_params_{recommender_name}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    
    recommender.fit(**best_params)
    
    loaded_recommenders[recommender_name] = recommender
    
    new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
    print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2765.91 column/sec. Elapsed time 13.78 sec
Training done in 14.175634sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 3815 (10.0%) in 5.00 min. Items per second: 12.71
SLIMElasticNetRecommender: Processed 7788 (20.4%) in 10.00 min. Items per second: 12.98
SLIMElasticNetRecommender: Processed 11715 (30.7%) in 15.00 min. Items per second: 13.01
SLIMElasticNetRecommender: Processed 15659 (41.1%) in 20.00 min. Items per second: 13.05
SLIMElasticNetRecommender: Processed 19621 (51.5%) in 25.00 min. Items per second: 13.08
SLIMElasticNetRecommender: Processed 23674 (62.1%) in 30.00 min. Items per second: 13.15
SLIMElasticNetRecommender: Processed 27611 (72.4%) in 35.01 min. Items per second: 13.15
SLIMElasticNetRecommender: Processed 31017 (81.4%) in 40.01 min. Items per second: 12.92
SLIMElasticNetRecommender: Processed 34609 (90.8%) in 45.01 min. Items 

In [13]:
from Recommenders.Hybrid.LinearCombinationRecommender import LinearCombinationRecommender 

def objective_function_(optuna_trial):
    
    recommender_instance = LinearCombinationRecommender(URM_train, loaded_recommenders.values())
    
    norm = optuna_trial.suggest_categorical('norm', [1, 2, np.inf, -np.inf])
    itemknn_weight = optuna_trial.suggest_float('itemknn_weight', 0.0, 1.0)
    slim_weight = optuna_trial.suggest_float('slim_weight', 0.0, 1.0)
    rp3beta_weight = optuna_trial.suggest_float('rp3beta_weight', 0.0, 1.0)

    # Ensure the weights sum to 1 (Normalization)
    total_weight = itemknn_weight + slim_weight + rp3beta_weight
    itemknn_weight /= total_weight
    slim_weight /= total_weight
    rp3beta_weight /= total_weight
    
    full_hyperp = {
                    "weights": [itemknn_weight, slim_weight,rp3beta_weight],
                    "norm": norm
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[50]["RECALL"]

In [14]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_, n_trials=50)

[I 2024-12-30 19:56:46,527] A new study created in RDB with name: hyperparameters_tuning_SecondLinearCombination_Recall_Recall


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.79 min. Users per second: 332


[I 2024-12-30 19:58:33,936] Trial 0 finished with value: 0.2786669523180989 and parameters: {'norm': inf, 'itemknn_weight': 0.8017727686768834, 'slim_weight': 0.06805974943560078, 'rp3beta_weight': 0.9307542841476057}. Best is trial 0 with value: 0.2786669523180989.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.79 min. Users per second: 332


[I 2024-12-30 20:00:21,366] Trial 1 finished with value: 0.2798327233738146 and parameters: {'norm': 2, 'itemknn_weight': 0.8083539714554404, 'slim_weight': 0.3302456909514888, 'rp3beta_weight': 0.7235596186873365}. Best is trial 1 with value: 0.2798327233738146.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.78 min. Users per second: 333


[I 2024-12-30 20:02:08,320] Trial 2 finished with value: 0.28132168929217505 and parameters: {'norm': inf, 'itemknn_weight': 0.5000509896454106, 'slim_weight': 0.16666809466611943, 'rp3beta_weight': 0.8645180341763296}. Best is trial 2 with value: 0.28132168929217505.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 334


[I 2024-12-30 20:03:54,971] Trial 3 finished with value: 0.2829492536978141 and parameters: {'norm': 1, 'itemknn_weight': 0.313637694261203, 'slim_weight': 0.4216987111189996, 'rp3beta_weight': 0.9956052800407162}. Best is trial 3 with value: 0.2829492536978141.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.71 min. Users per second: 347


[I 2024-12-30 20:05:37,723] Trial 4 finished with value: 0.28677552896728864 and parameters: {'norm': -inf, 'itemknn_weight': 0.00895418505395984, 'slim_weight': 0.4294937855765637, 'rp3beta_weight': 0.6261053069090143}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 349


[I 2024-12-30 20:07:19,933] Trial 5 finished with value: 0.2738988440590412 and parameters: {'norm': -inf, 'itemknn_weight': 0.3755849491922407, 'slim_weight': 0.3841634177993525, 'rp3beta_weight': 0.46366497264153117}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 335


[I 2024-12-30 20:09:06,503] Trial 6 finished with value: 0.28114332308373646 and parameters: {'norm': 2, 'itemknn_weight': 0.24178957084040154, 'slim_weight': 0.06676115292015639, 'rp3beta_weight': 0.7507218881854908}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.76 min. Users per second: 338


[I 2024-12-30 20:10:52,039] Trial 7 finished with value: 0.28227774854922827 and parameters: {'norm': 2, 'itemknn_weight': 0.47482964311656894, 'slim_weight': 0.4849586782571773, 'rp3beta_weight': 0.34661459257857363}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 342


[I 2024-12-30 20:12:36,139] Trial 8 finished with value: 0.2786982407112404 and parameters: {'norm': inf, 'itemknn_weight': 0.5984604664568852, 'slim_weight': 0.5292810716399091, 'rp3beta_weight': 0.11713013018899499}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.79 min. Users per second: 332


[I 2024-12-30 20:14:23,599] Trial 9 finished with value: 0.2764816330129587 and parameters: {'norm': 2, 'itemknn_weight': 0.7699889989607855, 'slim_weight': 0.5983854447861612, 'rp3beta_weight': 0.05273149037333291}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.78 min. Users per second: 334


[I 2024-12-30 20:16:10,307] Trial 10 finished with value: 0.2862756916829647 and parameters: {'norm': -inf, 'itemknn_weight': 0.07935019420489842, 'slim_weight': 0.894142137563047, 'rp3beta_weight': 0.6239086083091625}. Best is trial 4 with value: 0.28677552896728864.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.76 min. Users per second: 337


[I 2024-12-30 20:17:56,197] Trial 11 finished with value: 0.2869598351585682 and parameters: {'norm': -inf, 'itemknn_weight': 0.003535787779213444, 'slim_weight': 0.9371999408391409, 'rp3beta_weight': 0.5925674588925747}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 343


[I 2024-12-30 20:19:40,036] Trial 12 finished with value: 0.28685592276919547 and parameters: {'norm': -inf, 'itemknn_weight': 0.02527560794102638, 'slim_weight': 0.9645125061805467, 'rp3beta_weight': 0.4991298785732343}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 343


[I 2024-12-30 20:21:23,964] Trial 13 finished with value: 0.2834423818096713 and parameters: {'norm': -inf, 'itemknn_weight': 0.1516693392000785, 'slim_weight': 0.9679090709788495, 'rp3beta_weight': 0.33609388077812885}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.72 min. Users per second: 344


[I 2024-12-30 20:23:07,490] Trial 14 finished with value: 0.2703595806158202 and parameters: {'norm': -inf, 'itemknn_weight': 0.9932920152356056, 'slim_weight': 0.8016582139584779, 'rp3beta_weight': 0.3628971458805211}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.74 min. Users per second: 341


[I 2024-12-30 20:24:52,052] Trial 15 finished with value: 0.2864468983531747 and parameters: {'norm': 1, 'itemknn_weight': 0.002446702646846412, 'slim_weight': 0.7286051936416741, 'rp3beta_weight': 0.5183899477543434}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.68 min. Users per second: 353


[I 2024-12-30 20:26:32,962] Trial 16 finished with value: 0.27940470733595274 and parameters: {'norm': -inf, 'itemknn_weight': 0.20621954691008249, 'slim_weight': 0.7026661282352165, 'rp3beta_weight': 0.18997752649398414}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.71 min. Users per second: 347


[I 2024-12-30 20:28:15,712] Trial 17 finished with value: 0.28491856908186763 and parameters: {'norm': -inf, 'itemknn_weight': 0.12220489287585018, 'slim_weight': 0.9988458870270168, 'rp3beta_weight': 0.5032871191463861}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.69 min. Users per second: 351


[I 2024-12-30 20:29:57,182] Trial 18 finished with value: 0.27974474155151474 and parameters: {'norm': -inf, 'itemknn_weight': 0.32351837012544027, 'slim_weight': 0.8477079730103438, 'rp3beta_weight': 0.6421317489542888}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 348


[I 2024-12-30 20:31:39,618] Trial 19 finished with value: 0.28325573669297066 and parameters: {'norm': 1, 'itemknn_weight': 0.22112881667661513, 'slim_weight': 0.6486114121721113, 'rp3beta_weight': 0.21374605308955374}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.71 min. Users per second: 346


[I 2024-12-30 20:33:22,532] Trial 20 finished with value: 0.28646312767266296 and parameters: {'norm': -inf, 'itemknn_weight': 0.07767379592199819, 'slim_weight': 0.8161662761280853, 'rp3beta_weight': 0.7974990310891047}. Best is trial 11 with value: 0.2869598351585682.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 349


[I 2024-12-30 20:35:04,756] Trial 21 finished with value: 0.286988783056549 and parameters: {'norm': -inf, 'itemknn_weight': 0.02594160485205798, 'slim_weight': 0.8994715831658237, 'rp3beta_weight': 0.6203116989754509}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.71 min. Users per second: 348


[I 2024-12-30 20:36:47,239] Trial 22 finished with value: 0.28696190578100983 and parameters: {'norm': -inf, 'itemknn_weight': 0.003603094061744796, 'slim_weight': 0.9178852084574608, 'rp3beta_weight': 0.5817284920608365}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.69 min. Users per second: 351


[I 2024-12-30 20:38:28,957] Trial 23 finished with value: 0.28390745728241257 and parameters: {'norm': -inf, 'itemknn_weight': 0.15846810103690265, 'slim_weight': 0.8971336308623911, 'rp3beta_weight': 0.5798793603204341}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 342


[I 2024-12-30 20:40:13,128] Trial 24 finished with value: 0.2861413849656841 and parameters: {'norm': -inf, 'itemknn_weight': 0.08305876709446895, 'slim_weight': 0.7534539821205087, 'rp3beta_weight': 0.7289291095298283}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.75 min. Users per second: 339


[I 2024-12-30 20:41:58,444] Trial 25 finished with value: 0.281093708722353 and parameters: {'norm': -inf, 'itemknn_weight': 0.27146536997224785, 'slim_weight': 0.9043662831971244, 'rp3beta_weight': 0.6724943628804272}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.74 min. Users per second: 340


[I 2024-12-30 20:43:43,311] Trial 26 finished with value: 0.27559020533174644 and parameters: {'norm': -inf, 'itemknn_weight': 0.39539012714742916, 'slim_weight': 0.6534357463205853, 'rp3beta_weight': 0.4181329226551922}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.71 min. Users per second: 347


[I 2024-12-30 20:45:25,983] Trial 27 finished with value: 0.2867427893483177 and parameters: {'norm': inf, 'itemknn_weight': 0.1751653124904663, 'slim_weight': 0.8009254640147296, 'rp3beta_weight': 0.553902255205988}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 348


[I 2024-12-30 20:47:08,369] Trial 28 finished with value: 0.2862567768227967 and parameters: {'norm': 1, 'itemknn_weight': 0.07764749620136542, 'slim_weight': 0.9165263906444441, 'rp3beta_weight': 0.8018503912688544}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 350


[I 2024-12-30 20:48:50,263] Trial 29 finished with value: 0.2846254462665733 and parameters: {'norm': inf, 'itemknn_weight': 0.004904397911291561, 'slim_weight': 0.25350045708929736, 'rp3beta_weight': 0.43835422799405177}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.68 min. Users per second: 354


[I 2024-12-30 20:50:31,022] Trial 30 finished with value: 0.2709003214487181 and parameters: {'norm': -inf, 'itemknn_weight': 0.6392670047019483, 'slim_weight': 0.5706806502813644, 'rp3beta_weight': 0.26334632514422146}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.72 min. Users per second: 346


[I 2024-12-30 20:52:14,186] Trial 31 finished with value: 0.2869742311391852 and parameters: {'norm': -inf, 'itemknn_weight': 0.0003998246058797908, 'slim_weight': 0.95648527952069, 'rp3beta_weight': 0.558337351577856}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 343


[I 2024-12-30 20:53:58,176] Trial 32 finished with value: 0.2853356885385396 and parameters: {'norm': -inf, 'itemknn_weight': 0.1077219439749195, 'slim_weight': 0.8580775343820929, 'rp3beta_weight': 0.5696739994705858}. Best is trial 21 with value: 0.286988783056549.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.75 min. Users per second: 339


[I 2024-12-30 20:55:43,377] Trial 33 finished with value: 0.2870006038649443 and parameters: {'norm': -inf, 'itemknn_weight': 0.05093102684040978, 'slim_weight': 0.9530568928490322, 'rp3beta_weight': 0.6987604626586648}. Best is trial 33 with value: 0.2870006038649443.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 349


[I 2024-12-30 20:57:25,480] Trial 34 finished with value: 0.2863818330146268 and parameters: {'norm': -inf, 'itemknn_weight': 0.06958794741045823, 'slim_weight': 0.7845100621196431, 'rp3beta_weight': 0.9041713650704739}. Best is trial 33 with value: 0.2870006038649443.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.70 min. Users per second: 348


[I 2024-12-30 20:59:07,802] Trial 35 finished with value: 0.28721344892443607 and parameters: {'norm': 2, 'itemknn_weight': 0.14764146476671322, 'slim_weight': 0.9888439113534109, 'rp3beta_weight': 0.6723675826025292}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.72 min. Users per second: 346


[I 2024-12-30 21:00:50,951] Trial 36 finished with value: 0.28653135132982227 and parameters: {'norm': 2, 'itemknn_weight': 0.2873911473694052, 'slim_weight': 0.9767221361646804, 'rp3beta_weight': 0.6941602558780239}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.74 min. Users per second: 341


[I 2024-12-30 21:02:35,369] Trial 37 finished with value: 0.2866816188946735 and parameters: {'norm': 2, 'itemknn_weight': 0.19219698936158064, 'slim_weight': 0.8542565856481682, 'rp3beta_weight': 0.8450028267401128}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.79 min. Users per second: 331


[I 2024-12-30 21:04:22,935] Trial 38 finished with value: 0.28718195784440764 and parameters: {'norm': 2, 'itemknn_weight': 0.1340557797664313, 'slim_weight': 0.9910446054377802, 'rp3beta_weight': 0.7730254610910653}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 335


[I 2024-12-30 21:06:09,245] Trial 39 finished with value: 0.28327945028338253 and parameters: {'norm': 2, 'itemknn_weight': 0.38246213750965186, 'slim_weight': 0.2753781932181986, 'rp3beta_weight': 0.771647683937844}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 335


[I 2024-12-30 21:07:55,668] Trial 40 finished with value: 0.27933926552418975 and parameters: {'norm': 2, 'itemknn_weight': 0.49866867065484466, 'slim_weight': 0.0045101760109323985, 'rp3beta_weight': 0.9524357681516783}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.75 min. Users per second: 340


[I 2024-12-30 21:09:40,651] Trial 41 finished with value: 0.2871009786277772 and parameters: {'norm': 2, 'itemknn_weight': 0.1280965456553857, 'slim_weight': 0.9707476332796675, 'rp3beta_weight': 0.7019042256316014}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.73 min. Users per second: 343


[I 2024-12-30 21:11:24,537] Trial 42 finished with value: 0.2871992819654387 and parameters: {'norm': 2, 'itemknn_weight': 0.13160315316009874, 'slim_weight': 0.8584233384022477, 'rp3beta_weight': 0.7071643062389888}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.74 min. Users per second: 341


[I 2024-12-30 21:13:09,169] Trial 43 finished with value: 0.28683021897891503 and parameters: {'norm': 2, 'itemknn_weight': 0.24604773937667246, 'slim_weight': 0.9977308960442981, 'rp3beta_weight': 0.8510122691809919}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 336


[I 2024-12-30 21:14:55,421] Trial 44 finished with value: 0.2871362421450292 and parameters: {'norm': 2, 'itemknn_weight': 0.13658000933500927, 'slim_weight': 0.8617270072466775, 'rp3beta_weight': 0.6816476998107995}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 334


[I 2024-12-30 21:16:42,062] Trial 45 finished with value: 0.2869472482944987 and parameters: {'norm': 2, 'itemknn_weight': 0.14184789794653097, 'slim_weight': 0.8516859249796722, 'rp3beta_weight': 0.7662919676472666}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.75 min. Users per second: 338


[I 2024-12-30 21:18:27,515] Trial 46 finished with value: 0.2857288477477511 and parameters: {'norm': 2, 'itemknn_weight': 0.3221539474966364, 'slim_weight': 0.7571746675815916, 'rp3beta_weight': 0.6670327985451188}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.76 min. Users per second: 337


[I 2024-12-30 21:20:13,184] Trial 47 finished with value: 0.2857649385222292 and parameters: {'norm': 2, 'itemknn_weight': 0.12438425460031531, 'slim_weight': 0.48023001805246496, 'rp3beta_weight': 0.7301332682139754}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.77 min. Users per second: 335


[I 2024-12-30 21:21:59,671] Trial 48 finished with value: 0.2848871590727443 and parameters: {'norm': 2, 'itemknn_weight': 0.4480307272556454, 'slim_weight': 0.7028281045298339, 'rp3beta_weight': 0.9081073750481905}. Best is trial 35 with value: 0.28721344892443607.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.75 min. Users per second: 338


[I 2024-12-30 21:23:45,038] Trial 49 finished with value: 0.28488609606180826 and parameters: {'norm': 2, 'itemknn_weight': 0.5835796962184489, 'slim_weight': 0.9439109357216166, 'rp3beta_weight': 0.8232349277498803}. Best is trial 35 with value: 0.28721344892443607.


## Some optuna visualizations on recommender parameters

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [16]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [17]:
if config['tune_best_params']:
    
    loaded_recommenders = {}
    
    for recommender_name, recommender_class in recommenders.items():
        
        start_time = time.time()
        
        print(f"{recommender_name} Model - TRAINING with its best parameters.")
        try:
            recommender = recommender_class(URM_train + URM_validation)
        except Exception as e:
            recommender = recommender_class(URM_train + URM_validation, ICM_all)
    
        # Extract best parameter values of the relative recommender model.
        with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/Optimizing{config["metric"]}/best_params_{recommender_name}_{config["metric"]}.json', 'r') as best_params_json:
                best_params = json.load(best_params_json)
        
        recommender.fit(**best_params)
        
        loaded_recommenders[recommender_name] = recommender
        
        new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
        print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2524.89 column/sec. Elapsed time 15.10 sec
Training done in 15.574344sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 3117 ( 8.2%) in 5.00 min. Items per second: 10.39
SLIMElasticNetRecommender: Processed 6222 (16.3%) in 10.00 min. Items per second: 10.37
SLIMElasticNetRecommender: Processed 9227 (24.2%) in 15.00 min. Items per second: 10.25
SLIMElasticNetRecommender: Processed 12255 (32.1%) in 20.00 min. Items per second: 10.21
SLIMElasticNetRecommender: Processed 15463 (40.6%) in 25.00 min. Items per second: 10.31
SLIMElasticNetRecommender: Processed 18571 (48.7%) in 30.00 min. Items per second: 10.32
SLIMElasticNetRecommender: Processed 22044 (57.8%) in 35.01 min. Items per second: 10.49
SLIMElasticNetRecommender: Processed 24977 (65.5%) in 40.01 min. Items per second: 10.40
SLIMElasticNetRecommender: Processed 27760 (72.8%) in 45.01 min. Items p

In [18]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
            
    fit_params = {
    "weights": list(best_params.values())[1:], 
    "norm": best_params["norm"]
    }
    
    recommender_instance = LinearCombinationRecommender(URM_train + URM_validation, loaded_recommenders.values())
    recommender_instance.fit(**fit_params)

LinearCombinationRecommender: Fit completed in 0.00 seconds.


# Testing

Create the recommendations for the submission. 

In [19]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_SecondLinearCombination_Recall_Recall.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [20]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombination_RecallRecommender/OptimizingRecall/best_params_SecondLinearCombination_Recall_Recall.json' created successfully.


Save the history of the tuned model.

In [21]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombination_RecallRecommender/OptimizingRecall/history_SecondLinearCombination_Recall_Recall.db' created successfully.


Save the best trained model and its submission.

In [22]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombination_RecallRecommender/OptimizingRecall/Submission/submission_SecondLinearCombination_Recall_Recall.csv' created successfully.
