# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 3871, done.[K
remote: Counting objects: 100% (1344/1344), done.[K
remote: Compressing objects: 100% (550/550), done.[K
remote: Total 3871 (delta 737), reused 1116 (delta 606), pack-reused 2527 (from 1)[K
Receiving objects: 100% (3871/3871), 165.12 MiB | 21.58 MiB/s, done.
Resolving deltas: 100% (2256/2256), done.
Updating files: 100% (370/370), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV'
D_PATH = 'Hybrid/LinearCombination'

np.random.seed(42)

## Import the repository

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SecondLinearCombination',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SecondLinearCombination.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


## Import the best recommeders previously trained.

In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommenders = {
    "ItemKNNCF": ItemKNNCFRecommender,
    "SLIMElasticNet": SLIMElasticNetRecommender,
    "RP3beta": RP3betaRecommender
}

paths_to_best_params = {
    "ItemKNNCF": "KNN",
    "SLIMElasticNet": "SLIM",
    "RP3beta": "GraphBased"
}


loaded_recommenders = {}

for recommender_name, recommender_class in recommenders.items():
    
    start_time = time.time()
    
    print(f"{recommender_name} Model - TRAINING with its best parameters.")
    try:
        recommender = recommender_class(URM_train)
    except Exception as e:
        recommender = recommender_class(URM_train, ICM_all)

    # Extract best parameter values of the relative recommender model.
    with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/best_params_{recommender_name}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    
    recommender.fit(**best_params)
    
    loaded_recommenders[recommender_name] = recommender
    
    new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
    print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2740.83 column/sec. Elapsed time 13.91 sec
Training done in 14.119500sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 3181 ( 8.3%) in 5.00 min. Items per second: 10.60
SLIMElasticNetRecommender: Processed 6320 (16.6%) in 10.00 min. Items per second: 10.53
SLIMElasticNetRecommender: Processed 9527 (25.0%) in 15.00 min. Items per second: 10.58
SLIMElasticNetRecommender: Processed 12601 (33.1%) in 20.00 min. Items per second: 10.50
SLIMElasticNetRecommender: Processed 15727 (41.3%) in 25.00 min. Items per second: 10.48
SLIMElasticNetRecommender: Processed 18737 (49.2%) in 30.01 min. Items per second: 10.41
SLIMElasticNetRecommender: Processed 21829 (57.3%) in 35.01 min. Items per second: 10.39
SLIMElasticNetRecommender: Processed 24900 (65.3%) in 40.01 min. Items per second: 10.37
SLIMElasticNetRecommender: Processed 28037 (73.5%) in 45.01 min. Items p

In [13]:
from Recommenders.Hybrid.LinearCombinationRecommender import LinearCombinationRecommender 

def objective_function_(optuna_trial):
    
    recommender_instance = LinearCombinationRecommender(URM_train, loaded_recommenders.values())
    
    norm = optuna_trial.suggest_categorical('norm', [1, 2, np.inf, -np.inf])
    itemknn_weight = optuna_trial.suggest_float('itemknn_weight', 0.0, 1.0)
    slim_weight = optuna_trial.suggest_float('slim_weight', 0.0, 1.0)
    rp3beta_weight = optuna_trial.suggest_float('rp3beta_weight', 0.0, 1.0)

    # Ensure the weights sum to 1 (Normalization)
    total_weight = itemknn_weight + slim_weight + rp3beta_weight
    itemknn_weight /= total_weight
    slim_weight /= total_weight
    rp3beta_weight /= total_weight
    
    full_hyperp = {
                    "weights": [itemknn_weight, slim_weight,rp3beta_weight],
                    "norm": norm
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [14]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_, n_trials=50)

[I 2024-12-03 10:47:33,744] Using an existing study with name 'hyperparameters_tuning_SecondLinearCombination' instead of creating a new one.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:49:05,065] Trial 150 finished with value: 0.06002877388254337 and parameters: {'norm': 1, 'itemknn_weight': 0.09904412832781548, 'slim_weight': 0.9453934401162589, 'rp3beta_weight': 0.1938288686637028}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:50:36,394] Trial 151 finished with value: 0.060733739579081195 and parameters: {'norm': 2, 'itemknn_weight': 0.034038118978174624, 'slim_weight': 0.91063167856302, 'rp3beta_weight': 0.1714334855006142}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:52:07,708] Trial 152 finished with value: 0.060713378238871636 and parameters: {'norm': 2, 'itemknn_weight': 0.037180101041017335, 'slim_weight': 0.8925297069496255, 'rp3beta_weight': 0.16703557573395736}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:53:38,935] Trial 153 finished with value: 0.06059329159392184 and parameters: {'norm': 2, 'itemknn_weight': 0.0008018207483391701, 'slim_weight': 0.9597466076233702, 'rp3beta_weight': 0.1052209539804514}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:55:10,156] Trial 154 finished with value: 0.060690981210575035 and parameters: {'norm': 2, 'itemknn_weight': 0.07519584268482016, 'slim_weight': 0.998813055260068, 'rp3beta_weight': 0.13313008652595615}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 10:56:41,215] Trial 155 finished with value: 0.06066203675832656 and parameters: {'norm': 2, 'itemknn_weight': 0.04950286373101348, 'slim_weight': 0.9238827370247127, 'rp3beta_weight': 0.20148733131295338}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 10:58:12,379] Trial 156 finished with value: 0.06071495238540983 and parameters: {'norm': 2, 'itemknn_weight': 0.028626844498447702, 'slim_weight': 0.8639340361996861, 'rp3beta_weight': 0.1154216888858007}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 392


[I 2024-12-03 10:59:43,479] Trial 157 finished with value: 0.06071046071656918 and parameters: {'norm': 2, 'itemknn_weight': 0.020712011673699314, 'slim_weight': 0.8781262180065683, 'rp3beta_weight': 0.16297900120660214}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:01:14,729] Trial 158 finished with value: 0.05845857608857843 and parameters: {'norm': 2, 'itemknn_weight': 0.8433512960306492, 'slim_weight': 0.9112476084226385, 'rp3beta_weight': 0.14004416263858357}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:02:46,145] Trial 159 finished with value: 0.06069958996381083 and parameters: {'norm': 2, 'itemknn_weight': 0.04290199685932877, 'slim_weight': 0.9534731621227122, 'rp3beta_weight': 0.08875692569935428}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:04:17,521] Trial 160 finished with value: 0.06071584425313692 and parameters: {'norm': 2, 'itemknn_weight': 0.020004744309073213, 'slim_weight': 0.9271730015506231, 'rp3beta_weight': 0.17818843538703455}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:05:48,899] Trial 161 finished with value: 0.060704723777414704 and parameters: {'norm': 2, 'itemknn_weight': 0.023723485920320916, 'slim_weight': 0.9314202307622352, 'rp3beta_weight': 0.18161817288244153}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:07:20,138] Trial 162 finished with value: 0.06065415599212298 and parameters: {'norm': 2, 'itemknn_weight': 0.00021836146220234956, 'slim_weight': 0.977963819827531, 'rp3beta_weight': 0.13056901163300738}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:08:51,329] Trial 163 finished with value: 0.06070372823006456 and parameters: {'norm': 2, 'itemknn_weight': 0.05516575728831773, 'slim_weight': 0.8980586062497782, 'rp3beta_weight': 0.15264080706286626}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:10:22,399] Trial 164 finished with value: 0.060618182507350696 and parameters: {'norm': 2, 'itemknn_weight': 0.03320053651413336, 'slim_weight': 0.9275145234210069, 'rp3beta_weight': 0.24500477218283195}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:11:53,460] Trial 165 finished with value: 0.06065391853234071 and parameters: {'norm': 2, 'itemknn_weight': 0.06205406034985612, 'slim_weight': 0.9621506026586543, 'rp3beta_weight': 0.21409226678930887}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:13:24,507] Trial 166 finished with value: 0.06069537477396539 and parameters: {'norm': 2, 'itemknn_weight': 0.02073966548186717, 'slim_weight': 0.8832837469504673, 'rp3beta_weight': 0.11539296381824615}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:14:55,803] Trial 167 finished with value: 0.06067962104540111 and parameters: {'norm': 2, 'itemknn_weight': 0.0934254641907548, 'slim_weight': 0.9456462433878113, 'rp3beta_weight': 0.17148586624406204}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:16:27,143] Trial 168 finished with value: 0.06070704709284399 and parameters: {'norm': 2, 'itemknn_weight': 0.04354458320833413, 'slim_weight': 0.9993269707465692, 'rp3beta_weight': 0.1915436367422653}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:17:58,209] Trial 169 finished with value: 0.060684201901014374 and parameters: {'norm': 2, 'itemknn_weight': 0.0751546632950806, 'slim_weight': 0.905296349142541, 'rp3beta_weight': 0.1531593563680425}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:19:29,737] Trial 170 finished with value: 0.06066301223865303 and parameters: {'norm': 2, 'itemknn_weight': 0.0163842131347135, 'slim_weight': 0.9748426682594357, 'rp3beta_weight': 0.11913153881107064}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:21:00,947] Trial 171 finished with value: 0.060706733824304956 and parameters: {'norm': 2, 'itemknn_weight': 0.0707923030476025, 'slim_weight': 0.8508665356288635, 'rp3beta_weight': 0.09775481784235854}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:22:32,204] Trial 172 finished with value: 0.06074187006824844 and parameters: {'norm': 2, 'itemknn_weight': 0.03823869388987145, 'slim_weight': 0.8591726350204462, 'rp3beta_weight': 0.1394334082886131}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 389


[I 2024-12-03 11:24:03,776] Trial 173 finished with value: 0.06073729590164311 and parameters: {'norm': 2, 'itemknn_weight': 0.039253440005119496, 'slim_weight': 0.82203451643559, 'rp3beta_weight': 0.13829234265897344}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:25:35,129] Trial 174 finished with value: 0.05994991828261743 and parameters: {'norm': 2, 'itemknn_weight': 0.29347013299214736, 'slim_weight': 0.8049814488553836, 'rp3beta_weight': 0.16823825953631077}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 389


[I 2024-12-03 11:27:06,762] Trial 175 finished with value: 0.06070338040165102 and parameters: {'norm': 2, 'itemknn_weight': 0.0501326849011288, 'slim_weight': 0.832802602189152, 'rp3beta_weight': 0.14155091759768584}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:28:38,179] Trial 176 finished with value: 0.06070055206612138 and parameters: {'norm': 2, 'itemknn_weight': 0.04150264899211193, 'slim_weight': 0.9205582013313676, 'rp3beta_weight': 0.18020605943483403}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:30:09,447] Trial 177 finished with value: 0.06066589520108096 and parameters: {'norm': 2, 'itemknn_weight': 0.002432175991736011, 'slim_weight': 0.9502835740768899, 'rp3beta_weight': 0.13525008489465012}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:31:40,956] Trial 178 finished with value: 0.060696241000495504 and parameters: {'norm': 2, 'itemknn_weight': 0.11096166755002669, 'slim_weight': 0.929831949075035, 'rp3beta_weight': 0.15576241527014226}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:33:12,106] Trial 179 finished with value: 0.05888221660345359 and parameters: {'norm': 1, 'itemknn_weight': 0.05894937297356602, 'slim_weight': 0.885447791931899, 'rp3beta_weight': 0.5308856507371855}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:34:43,399] Trial 180 finished with value: 0.06063433980606162 and parameters: {'norm': 2, 'itemknn_weight': 0.08873867733752855, 'slim_weight': 0.9771786702050135, 'rp3beta_weight': 0.20295297527473416}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:36:14,596] Trial 181 finished with value: 0.06072505724675788 and parameters: {'norm': 2, 'itemknn_weight': 0.02941979409573006, 'slim_weight': 0.8670744452615328, 'rp3beta_weight': 0.1193828038229389}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:37:45,638] Trial 182 finished with value: 0.06075403737371521 and parameters: {'norm': 2, 'itemknn_weight': 0.026726449197668624, 'slim_weight': 0.8238699522946727, 'rp3beta_weight': 0.12930590843395112}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 393


[I 2024-12-03 11:39:16,354] Trial 183 finished with value: 0.06070913517816007 and parameters: {'norm': 2, 'itemknn_weight': 0.040440246513550916, 'slim_weight': 0.7532677922082669, 'rp3beta_weight': 0.07383058327335315}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:40:47,504] Trial 184 finished with value: 0.0606874048209893 and parameters: {'norm': 2, 'itemknn_weight': 0.06834915573839145, 'slim_weight': 0.8141235954288237, 'rp3beta_weight': 0.12941566535173826}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:42:18,950] Trial 185 finished with value: 0.06070455209287734 and parameters: {'norm': 2, 'itemknn_weight': 0.03377360600424599, 'slim_weight': 0.8393238923193973, 'rp3beta_weight': 0.09755248379208276}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 393


[I 2024-12-03 11:43:49,803] Trial 186 finished with value: 0.05914123505842653 and parameters: {'norm': -inf, 'itemknn_weight': 0.054749981902143294, 'slim_weight': 0.8703051209931134, 'rp3beta_weight': 0.14621213663605892}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:45:21,177] Trial 187 finished with value: 0.060181840681206356 and parameters: {'norm': 2, 'itemknn_weight': 0.018679874135649872, 'slim_weight': 0.214521738386788, 'rp3beta_weight': 0.11610289148835516}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:46:52,614] Trial 188 finished with value: 0.060678191827368444 and parameters: {'norm': 2, 'itemknn_weight': 0.08113151882907609, 'slim_weight': 0.8949684015409102, 'rp3beta_weight': 0.15960421811912331}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.51 min. Users per second: 392


[I 2024-12-03 11:48:23,500] Trial 189 finished with value: 0.06075623917216665 and parameters: {'norm': 2, 'itemknn_weight': 0.03519992144240509, 'slim_weight': 0.7936370168391682, 'rp3beta_weight': 0.13280740045597011}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:49:54,732] Trial 190 finished with value: 0.06065635667573954 and parameters: {'norm': 2, 'itemknn_weight': 0.014569881470348475, 'slim_weight': 0.7881916218490436, 'rp3beta_weight': 0.09385916266215895}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:51:25,932] Trial 191 finished with value: 0.06074609752127484 and parameters: {'norm': 2, 'itemknn_weight': 0.03991562855000571, 'slim_weight': 0.8184363673009852, 'rp3beta_weight': 0.12782018235112602}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:52:57,231] Trial 192 finished with value: 0.06076224144196995 and parameters: {'norm': 2, 'itemknn_weight': 0.03502534953395742, 'slim_weight': 0.8135110293261227, 'rp3beta_weight': 0.1289437994452132}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:54:28,751] Trial 193 finished with value: 0.06075623359799328 and parameters: {'norm': 2, 'itemknn_weight': 0.03460405165974822, 'slim_weight': 0.8255176451254922, 'rp3beta_weight': 0.13391993154550424}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:56:00,145] Trial 194 finished with value: 0.06074572516649879 and parameters: {'norm': 2, 'itemknn_weight': 0.03653161828786334, 'slim_weight': 0.8168745542660533, 'rp3beta_weight': 0.13040765408675398}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 11:57:31,587] Trial 195 finished with value: 0.06064089391902109 and parameters: {'norm': 2, 'itemknn_weight': 0.0005898592480198112, 'slim_weight': 0.8197451413122276, 'rp3beta_weight': 0.12518430490358184}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 11:59:02,731] Trial 196 finished with value: 0.06075086120977226 and parameters: {'norm': 2, 'itemknn_weight': 0.03642544948078807, 'slim_weight': 0.7241661952681222, 'rp3beta_weight': 0.0843409379070065}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 390


[I 2024-12-03 12:00:34,167] Trial 197 finished with value: 0.06068943270523388 and parameters: {'norm': 2, 'itemknn_weight': 0.04173099887865642, 'slim_weight': 0.792052170833838, 'rp3beta_weight': 0.0683830708318529}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 12:02:05,304] Trial 198 finished with value: 0.06074403061781734 and parameters: {'norm': 2, 'itemknn_weight': 0.05707202250065098, 'slim_weight': 0.8209709622164788, 'rp3beta_weight': 0.09923699895719275}. Best is trial 124 with value: 0.06076377879896466.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.52 min. Users per second: 391


[I 2024-12-03 12:03:36,497] Trial 199 finished with value: 0.06063713581138604 and parameters: {'norm': 2, 'itemknn_weight': 0.03476067908623501, 'slim_weight': 0.7653418749690175, 'rp3beta_weight': 0.038939999578766066}. Best is trial 124 with value: 0.06076377879896466.


## Some optuna visualizations on recommender parameters

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [16]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [17]:
if config['tune_best_params']:
    
    loaded_recommenders = {}
    
    for recommender_name, recommender_class in recommenders.items():
        
        start_time = time.time()
        
        print(f"{recommender_name} Model - TRAINING with its best parameters.")
        try:
            recommender = recommender_class(URM_train + URM_validation)
        except Exception as e:
            recommender = recommender_class(URM_train + URM_validation, ICM_all)
    
        # Extract best parameter values of the relative recommender model.
        with open(f'{GH_PATH}/{paths_to_best_params[recommender_name]}/{recommender_name}Recommender/best_params_{recommender_name}.json', 'r') as best_params_json:
                best_params = json.load(best_params_json)
        
        recommender.fit(**best_params)
        
        loaded_recommenders[recommender_name] = recommender
        
        new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
        print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNNCF Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 2546.14 column/sec. Elapsed time 14.97 sec
Training done in 15.232244sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 2512 ( 6.6%) in 5.00 min. Items per second: 8.37
SLIMElasticNetRecommender: Processed 5028 (13.2%) in 10.00 min. Items per second: 8.38
SLIMElasticNetRecommender: Processed 7489 (19.6%) in 15.00 min. Items per second: 8.32
SLIMElasticNetRecommender: Processed 9900 (26.0%) in 20.00 min. Items per second: 8.25
SLIMElasticNetRecommender: Processed 12276 (32.2%) in 25.00 min. Items per second: 8.18
SLIMElasticNetRecommender: Processed 14759 (38.7%) in 30.01 min. Items per second: 8.20
SLIMElasticNetRecommender: Processed 17249 (45.2%) in 35.01 min. Items per second: 8.21
SLIMElasticNetRecommender: Processed 19736 (51.8%) in 40.01 min. Items per second: 8.22
SLIMElasticNetRecommender: Processed 22207 (58.3%) in 45.01 min. Items per second

In [18]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
            
    fit_params = {
    "weights": list(best_params.values())[1:], 
    "norm": best_params["norm"]
    }
    
    recommender_instance = LinearCombinationRecommender(URM_train + URM_validation, loaded_recommenders.values())
    recommender_instance.fit(**fit_params)

LinearCombinationRecommender: Fit completed in 0.00 seconds.


# Testing

Create the recommendations for the submission. 

In [19]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_SecondLinearCombination.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [20]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{D_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/best_params_SecondLinearCombination.json' updated successfully.


Save the history of the tuned model.

In [21]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/history_SecondLinearCombination.db' updated successfully.


Save the best trained model and its submission.

In [22]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{D_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/Hybrid/LinearCombination/SecondLinearCombinationRecommender/Submission/submission_SecondLinearCombination.csv' created successfully.
