# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 3897, done.[K
remote: Counting objects: 100% (1370/1370), done.[K
remote: Compressing objects: 100% (573/573), done.[K
remote: Total 3897 (delta 755), reused 1116 (delta 606), pack-reused 2527 (from 1)[K
Receiving objects: 100% (3897/3897), 166.01 MiB | 31.05 MiB/s, done.
Resolving deltas: 100% (2274/2274), done.
Updating files: 100% (371/371), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__py

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV'
D_PATH = 'Hybrid/LinearCombination'

np.random.seed(42)

## Import the repository

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SecondLinearCombination',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SecondLinearCombination.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


## Import the best recommeders previously trained.

In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommenders = {
    "ItemKNNCF": ItemKNNCFRecommender,
    "SLIMElasticNet": SLIMElasticNetRecommender,
    "RP3beta": RP3betaRecommender
}

paths_to_best_params = {
    "ItemKNNCF": "KNN",
    "SLIMElasticNet": "SLIM",
    "RP3beta": "GraphBased"
}


loaded_recommenders = {}

for recommender_name, recommender_class in recommenders.items():
    
    start_time = time.time()
    
    print(f"{recommender_name} Model - TRAINING with its best parameters.")
    try:
        recommender = recommender_class(URM_train)
    except Exception as e:
        recommender = recommender_class(URM_train, ICM_all)

    # Extract best parameter values of the relative recommender model.
    with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/best_params_{recommender_name}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    
    recommender.fit(**best_params)
    
    loaded_recommenders[recommender_name] = recommender
    
    new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
    print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2793.30 column/sec. Elapsed time 13.65 sec
Training done in 13.874400sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 2510 ( 6.6%) in 5.00 min. Items per second: 8.36
SLIMElasticNetRecommender: Processed 5042 (13.2%) in 10.00 min. Items per second: 8.40
SLIMElasticNetRecommender: Processed 7590 (19.9%) in 15.00 min. Items per second: 8.43
SLIMElasticNetRecommender: Processed 10140 (26.6%) in 20.00 min. Items per second: 8.45
SLIMElasticNetRecommender: Processed 12643 (33.2%) in 25.00 min. Items per second: 8.43
SLIMElasticNetRecommender: Processed 15082 (39.6%) in 30.01 min. Items per second: 8.38
SLIMElasticNetRecommender: Processed 17478 (45.8%) in 35.01 min. Items per second: 8.32
SLIMElasticNetRecommender: Processed 19966 (52.4%) in 40.01 min. Items per second: 8.32
SLIMElasticNetRecommender: Processed 22515 (59.1%) in 45.01 min. Items per secon

In [13]:
from Recommenders.Hybrid.LinearCombinationRecommender import LinearCombinationRecommender 

def objective_function_(optuna_trial):
    
    recommender_instance = LinearCombinationRecommender(URM_train, loaded_recommenders.values())
    
    norm = optuna_trial.suggest_categorical('norm', [1, 2, np.inf, -np.inf])
    itemknn_weight = optuna_trial.suggest_float('itemknn_weight', 0.0, 1.0)
    slim_weight = optuna_trial.suggest_float('slim_weight', 0.0, 1.0)
    rp3beta_weight = optuna_trial.suggest_float('rp3beta_weight', 0.0, 1.0)

    # Ensure the weights sum to 1 (Normalization)
    total_weight = itemknn_weight + slim_weight + rp3beta_weight
    itemknn_weight /= total_weight
    slim_weight /= total_weight
    rp3beta_weight /= total_weight
    
    full_hyperp = {
                    "weights": [itemknn_weight, slim_weight,rp3beta_weight],
                    "norm": norm
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [14]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_, n_trials=50)

[I 2024-12-03 18:17:23,132] Using an existing study with name 'hyperparameters_tuning_SecondLinearCombination' instead of creating a new one.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:19:02,585] Trial 200 finished with value: 0.05933510034626593 and parameters: {'norm': 2, 'itemknn_weight': 0.46828733754698926, 'slim_weight': 0.7995396230439465, 'rp3beta_weight': 0.0862444659385102}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 362


[I 2024-12-03 18:20:41,230] Trial 201 finished with value: 0.06068103688541793 and parameters: {'norm': 2, 'itemknn_weight': 0.05701674310823511, 'slim_weight': 0.723033143800891, 'rp3beta_weight': 0.10879960059304565}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:22:20,051] Trial 202 finished with value: 0.06069077608099776 and parameters: {'norm': 2, 'itemknn_weight': 0.05530054003330073, 'slim_weight': 0.8230030697721338, 'rp3beta_weight': 0.13681355079051025}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:23:58,924] Trial 203 finished with value: 0.060731111913790124 and parameters: {'norm': 2, 'itemknn_weight': 0.032734630560229454, 'slim_weight': 0.7848231754707613, 'rp3beta_weight': 0.10494403148311067}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:25:37,956] Trial 204 finished with value: 0.06063109229270035 and parameters: {'norm': 2, 'itemknn_weight': 0.017081494084999864, 'slim_weight': 0.8018340431877594, 'rp3beta_weight': 0.061675785368598204}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:27:16,703] Trial 205 finished with value: 0.060635862670205584 and parameters: {'norm': 2, 'itemknn_weight': 0.0009161309187878733, 'slim_weight': 0.7811431713245741, 'rp3beta_weight': 0.09427652234441733}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:28:55,604] Trial 206 finished with value: 0.060709504188431906 and parameters: {'norm': 2, 'itemknn_weight': 0.030661594229444564, 'slim_weight': 0.7704559544632352, 'rp3beta_weight': 0.10113632593509411}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:30:34,643] Trial 207 finished with value: 0.06067484286405321 and parameters: {'norm': 2, 'itemknn_weight': 0.07323009384217016, 'slim_weight': 0.8198581873659557, 'rp3beta_weight': 0.0804797163796856}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 361


[I 2024-12-03 18:32:13,561] Trial 208 finished with value: 0.060732342691253764 and parameters: {'norm': 2, 'itemknn_weight': 0.036321040781375206, 'slim_weight': 0.7369802550560581, 'rp3beta_weight': 0.1289699105831966}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:33:52,579] Trial 209 finished with value: 0.06071725340414586 and parameters: {'norm': 2, 'itemknn_weight': 0.04048422412364222, 'slim_weight': 0.7348592366083294, 'rp3beta_weight': 0.12453547988786634}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:35:31,450] Trial 210 finished with value: 0.060664526184120014 and parameters: {'norm': 2, 'itemknn_weight': 0.06501110324691449, 'slim_weight': 0.760471646993534, 'rp3beta_weight': 0.052433953752056714}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 18:37:10,296] Trial 211 finished with value: 0.06072558790805557 and parameters: {'norm': 2, 'itemknn_weight': 0.029074403721673008, 'slim_weight': 0.8392855139490878, 'rp3beta_weight': 0.14859389312768284}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:38:49,448] Trial 212 finished with value: 0.06069087641611684 and parameters: {'norm': 2, 'itemknn_weight': 0.016515000411699416, 'slim_weight': 0.7978886686524035, 'rp3beta_weight': 0.10620979483174132}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:40:28,459] Trial 213 finished with value: 0.060730813138101726 and parameters: {'norm': 2, 'itemknn_weight': 0.044972470034536326, 'slim_weight': 0.8301710499613454, 'rp3beta_weight': 0.13194075899114455}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:42:07,478] Trial 214 finished with value: 0.06072341509530548 and parameters: {'norm': 2, 'itemknn_weight': 0.047128977238212716, 'slim_weight': 0.8248253990040855, 'rp3beta_weight': 0.1296616408218472}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:43:46,545] Trial 215 finished with value: 0.06068816402339199 and parameters: {'norm': 2, 'itemknn_weight': 0.08590216423925999, 'slim_weight': 0.7819343880971231, 'rp3beta_weight': 0.11518065829742562}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 18:45:26,025] Trial 216 finished with value: 0.0606717949060957 and parameters: {'norm': 2, 'itemknn_weight': 0.058938841970938335, 'slim_weight': 0.8115010763775676, 'rp3beta_weight': 0.15041669307554198}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 361


[I 2024-12-03 18:47:04,968] Trial 217 finished with value: 0.060709567734007625 and parameters: {'norm': 2, 'itemknn_weight': 0.04138448811586837, 'slim_weight': 0.7523213368923427, 'rp3beta_weight': 0.0752334933017639}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:48:43,969] Trial 218 finished with value: 0.06068871698138262 and parameters: {'norm': 2, 'itemknn_weight': 0.01630568773916855, 'slim_weight': 0.8442011066318275, 'rp3beta_weight': 0.13214312043135656}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:50:23,065] Trial 219 finished with value: 0.06065414595861115 and parameters: {'norm': 2, 'itemknn_weight': 0.07472851727562824, 'slim_weight': 0.8273679710655197, 'rp3beta_weight': 0.1650883867629002}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.63 min. Users per second: 363


[I 2024-12-03 18:52:01,333] Trial 220 finished with value: 0.060628976336517724 and parameters: {'norm': 1, 'itemknn_weight': 0.0004471155667187185, 'slim_weight': 0.7958597465469957, 'rp3beta_weight': 0.09634105659885686}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 361


[I 2024-12-03 18:53:40,257] Trial 221 finished with value: 0.060738307056678695 and parameters: {'norm': 2, 'itemknn_weight': 0.035755526381815096, 'slim_weight': 0.8417671208330729, 'rp3beta_weight': 0.1476280799295499}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:55:19,436] Trial 222 finished with value: 0.06074828259720627 and parameters: {'norm': 2, 'itemknn_weight': 0.03780936299544535, 'slim_weight': 0.8495376679222613, 'rp3beta_weight': 0.13915750887525194}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 18:56:58,399] Trial 223 finished with value: 0.06066776923814249 and parameters: {'norm': 2, 'itemknn_weight': 0.03410856319400864, 'slim_weight': 0.7122570988788305, 'rp3beta_weight': 0.1573556555240736}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 361


[I 2024-12-03 18:58:37,354] Trial 224 finished with value: 0.06072248532320009 and parameters: {'norm': 2, 'itemknn_weight': 0.05562638866285649, 'slim_weight': 0.8571057503291284, 'rp3beta_weight': 0.11746585101898538}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 19:00:16,850] Trial 225 finished with value: 0.06066178034635508 and parameters: {'norm': 2, 'itemknn_weight': 0.02784776571505647, 'slim_weight': 0.8060768287989, 'rp3beta_weight': 0.17564594948457304}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 19:01:56,147] Trial 226 finished with value: 0.060692879773999085 and parameters: {'norm': 2, 'itemknn_weight': 0.06720813429406601, 'slim_weight': 0.8511231442497881, 'rp3beta_weight': 0.1338969517349276}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 362


[I 2024-12-03 19:03:34,647] Trial 227 finished with value: 0.060222871055991695 and parameters: {'norm': -inf, 'itemknn_weight': 0.020506433384333528, 'slim_weight': 0.781120737070108, 'rp3beta_weight': 0.15319727723945573}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:05:13,738] Trial 228 finished with value: 0.060747004996687066 and parameters: {'norm': 2, 'itemknn_weight': 0.042643209557101436, 'slim_weight': 0.8388016379145855, 'rp3beta_weight': 0.10864545868524246}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:06:52,902] Trial 229 finished with value: 0.060674179537431115 and parameters: {'norm': 2, 'itemknn_weight': 0.0956288252870495, 'slim_weight': 0.8451301656527894, 'rp3beta_weight': 0.1483145455482133}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:08:32,101] Trial 230 finished with value: 0.06066598438785366 and parameters: {'norm': 2, 'itemknn_weight': 0.055090582062599244, 'slim_weight': 0.8578897954630034, 'rp3beta_weight': 0.18245827542192772}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 19:10:11,469] Trial 231 finished with value: 0.0607312746796504 and parameters: {'norm': 2, 'itemknn_weight': 0.0394005860183903, 'slim_weight': 0.8384839126280402, 'rp3beta_weight': 0.1106156497469671}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 19:11:50,335] Trial 232 finished with value: 0.06070228117467738 and parameters: {'norm': 2, 'itemknn_weight': 0.03923314955859241, 'slim_weight': 0.6563405518083566, 'rp3beta_weight': 0.11702957178004593}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 361


[I 2024-12-03 19:13:29,235] Trial 233 finished with value: 0.06010620583860988 and parameters: {'norm': 2, 'itemknn_weight': 0.019440660351445187, 'slim_weight': 0.8164207398096014, 'rp3beta_weight': 0.4749252004162464}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 19:15:08,572] Trial 234 finished with value: 0.06069510721364754 and parameters: {'norm': 2, 'itemknn_weight': 0.05363041335652803, 'slim_weight': 0.8276528706845109, 'rp3beta_weight': 0.13353884094446142}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:16:47,684] Trial 235 finished with value: 0.05950675255352672 and parameters: {'norm': 2, 'itemknn_weight': 0.06366584736464247, 'slim_weight': 0.8378687160139598, 'rp3beta_weight': 0.730823834928476}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:18:26,872] Trial 236 finished with value: 0.06070895234527599 and parameters: {'norm': 2, 'itemknn_weight': 0.039201677413493244, 'slim_weight': 0.864432993823031, 'rp3beta_weight': 0.10148822979484065}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:20:05,956] Trial 237 finished with value: 0.060624985228438996 and parameters: {'norm': 2, 'itemknn_weight': 0.013336569828238654, 'slim_weight': 0.8345471198496118, 'rp3beta_weight': 0.0808501301783991}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:21:45,154] Trial 238 finished with value: 0.06047310015451386 and parameters: {'norm': 2, 'itemknn_weight': 0.0783329567353497, 'slim_weight': 0.5136411931915602, 'rp3beta_weight': 0.15906090840228124}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:23:24,151] Trial 239 finished with value: 0.06071990559579939 and parameters: {'norm': 2, 'itemknn_weight': 0.046098005445040696, 'slim_weight': 0.8167111414057553, 'rp3beta_weight': 0.13414056518452436}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.66 min. Users per second: 357


[I 2024-12-03 19:25:04,026] Trial 240 finished with value: 0.06069911950358455 and parameters: {'norm': 2, 'itemknn_weight': 0.017563409577734015, 'slim_weight': 0.8725715653986317, 'rp3beta_weight': 0.11518055315168496}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.67 min. Users per second: 355


[I 2024-12-03 19:26:44,542] Trial 241 finished with value: 0.06072128018693363 and parameters: {'norm': 2, 'itemknn_weight': 0.033164377312520775, 'slim_weight': 0.7865740541673547, 'rp3beta_weight': 0.10065271932438319}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.66 min. Users per second: 358


[I 2024-12-03 19:28:24,257] Trial 242 finished with value: 0.06074793588362724 and parameters: {'norm': 2, 'itemknn_weight': 0.029372892178473744, 'slim_weight': 0.7943401653031386, 'rp3beta_weight': 0.11833698679459478}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:30:03,451] Trial 243 finished with value: 0.060673209631277765 and parameters: {'norm': 2, 'itemknn_weight': 0.00011139360708311771, 'slim_weight': 0.8251284005290112, 'rp3beta_weight': 0.13906075931905662}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:31:42,513] Trial 244 finished with value: 0.060664001096995504 and parameters: {'norm': 2, 'itemknn_weight': 0.05012532150667425, 'slim_weight': 0.8032269881887713, 'rp3beta_weight': 0.16414417417055008}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:33:21,526] Trial 245 finished with value: 0.06074936175715591 and parameters: {'norm': 2, 'itemknn_weight': 0.029753701505484186, 'slim_weight': 0.8038869693583334, 'rp3beta_weight': 0.11931374360241905}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 360


[I 2024-12-03 19:35:00,653] Trial 246 finished with value: 0.06074148990962958 and parameters: {'norm': 2, 'itemknn_weight': 0.020658859882543366, 'slim_weight': 0.8056771131667451, 'rp3beta_weight': 0.13966535505987143}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.64 min. Users per second: 362


[I 2024-12-03 19:36:39,298] Trial 247 finished with value: 0.059175137180402515 and parameters: {'norm': inf, 'itemknn_weight': 0.015998402859782927, 'slim_weight': 0.8020160840019942, 'rp3beta_weight': 0.9500281321489791}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.66 min. Users per second: 358


[I 2024-12-03 19:38:18,882] Trial 248 finished with value: 0.06066249384053687 and parameters: {'norm': 2, 'itemknn_weight': 0.07636302743264253, 'slim_weight': 0.8134023359324513, 'rp3beta_weight': 0.1550899964094792}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.65 min. Users per second: 359


[I 2024-12-03 19:39:58,233] Trial 249 finished with value: 0.06069701023641004 and parameters: {'norm': 2, 'itemknn_weight': 0.0011766805576060475, 'slim_weight': 0.7676642169708324, 'rp3beta_weight': 0.14320360065904808}. Best is trial 124 with value: 0.06076377879896466.


## Some optuna visualizations on recommender parameters

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [16]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [17]:
if config['tune_best_params']:
    
    loaded_recommenders = {}
    
    for recommender_name, recommender_class in recommenders.items():
        
        start_time = time.time()
        
        print(f"{recommender_name} Model - TRAINING with its best parameters.")
        try:
            recommender = recommender_class(URM_train + URM_validation)
        except Exception as e:
            recommender = recommender_class(URM_train + URM_validation, ICM_all)
    
        # Extract best parameter values of the relative recommender model.
        with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/best_params_{recommender_name}.json', 'r') as best_params_json:
                best_params = json.load(best_params_json)
        
        recommender.fit(**best_params)
        
        loaded_recommenders[recommender_name] = recommender
        
        new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
        print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2509.40 column/sec. Elapsed time 15.19 sec
Training done in 15.506526sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 1978 ( 5.2%) in 5.00 min. Items per second: 6.59
SLIMElasticNetRecommender: Processed 3951 (10.4%) in 10.00 min. Items per second: 6.58
SLIMElasticNetRecommender: Processed 5939 (15.6%) in 15.00 min. Items per second: 6.60
SLIMElasticNetRecommender: Processed 7926 (20.8%) in 20.00 min. Items per second: 6.60
SLIMElasticNetRecommender: Processed 9941 (26.1%) in 25.01 min. Items per second: 6.63
SLIMElasticNetRecommender: Processed 11938 (31.3%) in 30.01 min. Items per second: 6.63
SLIMElasticNetRecommender: Processed 13967 (36.6%) in 35.01 min. Items per second: 6.65
SLIMElasticNetRecommender: Processed 16003 (42.0%) in 40.01 min. Items per second: 6.67
SLIMElasticNetRecommender: Processed 17982 (47.2%) in 45.01 min. Items per second:

In [18]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
            
    fit_params = {
    "weights": list(best_params.values())[1:], 
    "norm": best_params["norm"]
    }
    
    recommender_instance = LinearCombinationRecommender(URM_train + URM_validation, loaded_recommenders.values())
    recommender_instance.fit(**fit_params)

LinearCombinationRecommender: Fit completed in 0.00 seconds.


# Testing

Create the recommendations for the submission. 

In [19]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_SecondLinearCombination.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [20]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/best_params_SecondLinearCombination.json' updated successfully.


Save the history of the tuned model.

In [21]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/history_SecondLinearCombination.db' updated successfully.


Save the best trained model and its submission.

In [22]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/Submission/submission_SecondLinearCombination.csv' updated successfully.
