# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6882, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 6882 (delta 18), reused 0 (delta 0), pack-reused 6822 (from 3)[K
Receiving objects: 100% (6882/6882), 412.47 MiB | 21.06 MiB/s, done.
Resolving deltas: 100% (3749/3749), done.
Updating files: 100% (506/506), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K’:
30353 |       [01;35m[K__pyx_t_4

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/SLIM'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SLIM_BPR',
    'n_folds': 5,
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIM_BPR_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.SLIM.SLIM_BPR_Python import SLIM_BPR_Python

def objective_function_SLIM_BPR(optuna_trial):

    full_hyperp = {
                    "topK": optuna_trial.suggest_int("topK", 0, 500),
                    "epochs": optuna_trial.suggest_int("epochs", 1, 50),
                    "lambda_i": optuna_trial.suggest_float("lambda_i", 1e-5, 1e-1, log=True),
                    "lambda_j": optuna_trial.suggest_float("lambda_j", 1e-5, 1e-1, log=True),
                    "learning_rate": optuna_trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
                }
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = SLIM_BPR_Python(URM_train_fold)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[10])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[10]["MAP"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_SLIM_BPR, n_trials=20, timeout=11*3600)

[I 2025-01-06 13:23:59,468] Using an existing study with name 'hyperparameters_tuning_SLIM_BPR_MAP' instead of creating a new one.


Epoch 1, Iteration 35736 in 11.04 seconds. Samples per second 3238.35
Epoch 2, Iteration 35736 in 7.95 seconds. Samples per second 4496.03
Epoch 3, Iteration 35736 in 6.37 seconds. Samples per second 5614.08
Epoch 4, Iteration 35736 in 5.40 seconds. Samples per second 6621.45
Epoch 5, Iteration 35736 in 4.65 seconds. Samples per second 7688.05
Epoch 6, Iteration 35736 in 4.42 seconds. Samples per second 8089.36
Epoch 7, Iteration 35736 in 4.01 seconds. Samples per second 8911.59
Epoch 8, Iteration 35736 in 3.74 seconds. Samples per second 9553.23
Epoch 9, Iteration 35736 in 3.62 seconds. Samples per second 9870.90
Epoch 10, Iteration 35736 in 3.47 seconds. Samples per second 10304.69
Epoch 11, Iteration 35736 in 3.40 seconds. Samples per second 10500.32
Epoch 12, Iteration 35736 in 3.43 seconds. Samples per second 10415.60
Epoch 13, Iteration 35736 in 3.31 seconds. Samples per second 10796.55
Epoch 14, Iteration 35736 in 3.26 seconds. Samples per second 10962.05
Epoch 15, Iteration 357

[I 2025-01-06 13:51:40,034] Trial 41 finished with value: 0.040179893231660754 and parameters: {'topK': 22, 'epochs': 39, 'lambda_i': 0.012002995832800612, 'lambda_j': 3.897810787222435e-05, 'learning_rate': 0.09581897940476075}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.40 seconds. Samples per second 3437.30
Epoch 2, Iteration 35736 in 7.60 seconds. Samples per second 4700.95
Epoch 3, Iteration 35736 in 5.96 seconds. Samples per second 5999.93
Epoch 4, Iteration 35736 in 5.08 seconds. Samples per second 7029.93
Epoch 5, Iteration 35736 in 4.52 seconds. Samples per second 7901.32
Epoch 6, Iteration 35736 in 4.19 seconds. Samples per second 8522.55
Epoch 7, Iteration 35736 in 3.94 seconds. Samples per second 9072.03
Epoch 8, Iteration 35736 in 3.84 seconds. Samples per second 9310.50
Epoch 9, Iteration 35736 in 3.83 seconds. Samples per second 9329.64
Epoch 10, Iteration 35736 in 3.53 seconds. Samples per second 10132.79
Epoch 11, Iteration 35736 in 3.44 seconds. Samples per second 10386.62
Epoch 12, Iteration 35736 in 3.40 seconds. Samples per second 10508.71
Epoch 13, Iteration 35736 in 3.32 seconds. Samples per second 10761.42
Epoch 14, Iteration 35736 in 3.31 seconds. Samples per second 10800.47
Epoch 15, Iteration 357

[I 2025-01-06 14:18:13,245] Trial 42 finished with value: 0.0002664826418719655 and parameters: {'topK': 0, 'epochs': 38, 'lambda_i': 0.010760526931868639, 'lambda_j': 2.647533217883142e-05, 'learning_rate': 0.04365119617520986}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.30 seconds. Samples per second 3469.58
Train completed in 0.17 minutes
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 41.52 sec. Users per second: 856
Epoch 1, Iteration 35736 in 11.12 seconds. Samples per second 3213.56
Train completed in 0.19 minutes
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.79 sec. Users per second: 994
Epoch 1, Iteration 35736 in 10.30 seconds. Samples per second 3469.23
Train completed in 0.17 minutes
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.84 sec. Users per second: 993
Epoch 1, Iteration 35736 in 10.66 seconds. Samples per second 3351.96
Train completed in 0.18 minutes
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldou

[I 2025-01-06 14:30:30,530] Trial 43 finished with value: 0.016253357734050154 and parameters: {'topK': 26, 'epochs': 1, 'lambda_i': 0.0034345671092510586, 'lambda_j': 1.1266776643467882e-05, 'learning_rate': 0.0712914319314129}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.28 seconds. Samples per second 3475.36
Epoch 2, Iteration 35736 in 9.22 seconds. Samples per second 3877.52
Epoch 3, Iteration 35736 in 7.28 seconds. Samples per second 4907.10
Epoch 4, Iteration 35736 in 6.26 seconds. Samples per second 5707.56
Epoch 5, Iteration 35736 in 5.45 seconds. Samples per second 6551.59
Epoch 6, Iteration 35736 in 4.89 seconds. Samples per second 7303.92
Epoch 7, Iteration 35736 in 4.42 seconds. Samples per second 8087.64
Epoch 8, Iteration 35736 in 4.26 seconds. Samples per second 8389.66
Epoch 9, Iteration 35736 in 3.92 seconds. Samples per second 9111.69
Epoch 10, Iteration 35736 in 3.76 seconds. Samples per second 9494.99
Epoch 11, Iteration 35736 in 3.61 seconds. Samples per second 9904.73
Epoch 12, Iteration 35736 in 3.54 seconds. Samples per second 10096.98
Epoch 13, Iteration 35736 in 3.44 seconds. Samples per second 10380.92
Epoch 14, Iteration 35736 in 3.37 seconds. Samples per second 10605.78
Epoch 15, Iteration 35736

[I 2025-01-06 14:57:15,857] Trial 44 finished with value: 0.03648660827661393 and parameters: {'topK': 70, 'epochs': 30, 'lambda_i': 0.013373459507030457, 'lambda_j': 6.370322579706744e-05, 'learning_rate': 0.09432496810652842}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.30 seconds. Samples per second 3469.54
Epoch 2, Iteration 35736 in 7.50 seconds. Samples per second 4763.26
Epoch 3, Iteration 35736 in 6.17 seconds. Samples per second 5791.41
Epoch 4, Iteration 35736 in 5.14 seconds. Samples per second 6950.20
Epoch 5, Iteration 35736 in 4.65 seconds. Samples per second 7693.43
Epoch 6, Iteration 35736 in 4.24 seconds. Samples per second 8418.78
Epoch 7, Iteration 35736 in 3.98 seconds. Samples per second 8974.32
Epoch 8, Iteration 35736 in 3.95 seconds. Samples per second 9051.92
Epoch 9, Iteration 35736 in 3.70 seconds. Samples per second 9645.62
Epoch 10, Iteration 35736 in 3.64 seconds. Samples per second 9826.84
Epoch 11, Iteration 35736 in 3.56 seconds. Samples per second 10041.39
Epoch 12, Iteration 35736 in 3.43 seconds. Samples per second 10404.03
Epoch 13, Iteration 35736 in 3.50 seconds. Samples per second 10215.30
Epoch 14, Iteration 35736 in 3.35 seconds. Samples per second 10663.73
Epoch 15, Iteration 3573

[I 2025-01-06 15:29:43,739] Trial 45 finished with value: 0.03487998262628563 and parameters: {'topK': 97, 'epochs': 45, 'lambda_i': 0.006704451316104146, 'lambda_j': 0.00016144669044154718, 'learning_rate': 0.030019065690620565}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.39 seconds. Samples per second 3438.64
Epoch 2, Iteration 35736 in 7.42 seconds. Samples per second 4814.22
Epoch 3, Iteration 35736 in 6.06 seconds. Samples per second 5892.49
Epoch 4, Iteration 35736 in 5.22 seconds. Samples per second 6849.59
Epoch 5, Iteration 35736 in 4.67 seconds. Samples per second 7644.81
Epoch 6, Iteration 35736 in 4.32 seconds. Samples per second 8275.46
Epoch 7, Iteration 35736 in 3.95 seconds. Samples per second 9043.28
Epoch 8, Iteration 35736 in 3.83 seconds. Samples per second 9324.23
Epoch 9, Iteration 35736 in 3.64 seconds. Samples per second 9820.34
Epoch 10, Iteration 35736 in 3.49 seconds. Samples per second 10230.29
Epoch 11, Iteration 35736 in 3.52 seconds. Samples per second 10144.80
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10579.95
Epoch 13, Iteration 35736 in 3.45 seconds. Samples per second 10362.39
Epoch 14, Iteration 35736 in 3.48 seconds. Samples per second 10264.62
Epoch 15, Iteration 357

[I 2025-01-06 15:59:20,252] Trial 46 finished with value: 0.034718622868503954 and parameters: {'topK': 138, 'epochs': 35, 'lambda_i': 0.03137035422836485, 'lambda_j': 1.6079507208544722e-05, 'learning_rate': 0.06893409000859302}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.20 seconds. Samples per second 3504.69
Epoch 2, Iteration 35736 in 7.43 seconds. Samples per second 4807.32
Epoch 3, Iteration 35736 in 6.01 seconds. Samples per second 5950.73
Epoch 4, Iteration 35736 in 5.19 seconds. Samples per second 6880.66
Epoch 5, Iteration 35736 in 4.56 seconds. Samples per second 7842.77
Epoch 6, Iteration 35736 in 4.24 seconds. Samples per second 8436.60
Epoch 7, Iteration 35736 in 4.01 seconds. Samples per second 8919.95
Epoch 8, Iteration 35736 in 3.78 seconds. Samples per second 9441.61
Epoch 9, Iteration 35736 in 3.77 seconds. Samples per second 9476.34
Epoch 10, Iteration 35736 in 3.51 seconds. Samples per second 10187.18
Epoch 11, Iteration 35736 in 3.49 seconds. Samples per second 10230.72
Epoch 12, Iteration 35736 in 3.45 seconds. Samples per second 10343.92
Epoch 13, Iteration 35736 in 3.41 seconds. Samples per second 10491.87
Epoch 14, Iteration 35736 in 3.32 seconds. Samples per second 10751.54
Epoch 15, Iteration 357

[I 2025-01-06 16:33:33,228] Trial 47 finished with value: 0.02998722130436879 and parameters: {'topK': 426, 'epochs': 40, 'lambda_i': 0.0013350417754040722, 'lambda_j': 0.0019974596569872116, 'learning_rate': 0.014451640548174991}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.29 seconds. Samples per second 3473.09
Epoch 2, Iteration 35736 in 7.41 seconds. Samples per second 4822.10
Epoch 3, Iteration 35736 in 6.08 seconds. Samples per second 5880.53
Epoch 4, Iteration 35736 in 5.23 seconds. Samples per second 6826.40
Epoch 5, Iteration 35736 in 4.59 seconds. Samples per second 7779.70
Epoch 6, Iteration 35736 in 4.23 seconds. Samples per second 8449.95
Epoch 7, Iteration 35736 in 3.95 seconds. Samples per second 9052.87
Epoch 8, Iteration 35736 in 3.74 seconds. Samples per second 9561.74
Epoch 9, Iteration 35736 in 3.69 seconds. Samples per second 9673.65
Epoch 10, Iteration 35736 in 3.52 seconds. Samples per second 10162.21
Epoch 11, Iteration 35736 in 3.60 seconds. Samples per second 9931.40
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10587.75
Epoch 13, Iteration 35736 in 3.31 seconds. Samples per second 10780.29
Epoch 14, Iteration 35736 in 3.34 seconds. Samples per second 10685.89
Epoch 15, Iteration 3573

[I 2025-01-06 17:03:33,395] Trial 48 finished with value: 0.03299077395815001 and parameters: {'topK': 190, 'epochs': 33, 'lambda_i': 0.059039012694117494, 'lambda_j': 4.2045019725951654e-05, 'learning_rate': 0.04217958962221993}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.17 seconds. Samples per second 3513.60
Epoch 2, Iteration 35736 in 7.43 seconds. Samples per second 4810.39
Epoch 3, Iteration 35736 in 6.03 seconds. Samples per second 5923.07
Epoch 4, Iteration 35736 in 5.21 seconds. Samples per second 6852.60
Epoch 5, Iteration 35736 in 4.65 seconds. Samples per second 7680.67
Epoch 6, Iteration 35736 in 4.26 seconds. Samples per second 8398.09
Epoch 7, Iteration 35736 in 3.91 seconds. Samples per second 9133.29
Epoch 8, Iteration 35736 in 3.74 seconds. Samples per second 9563.96
Epoch 9, Iteration 35736 in 3.57 seconds. Samples per second 10002.11
Epoch 10, Iteration 35736 in 3.43 seconds. Samples per second 10414.12
Epoch 11, Iteration 35736 in 3.51 seconds. Samples per second 10181.33
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10708.91
Epoch 13, Iteration 35736 in 3.29 seconds. Samples per second 10862.21
Epoch 14, Iteration 35736 in 3.29 seconds. Samples per second 10847.25
Epoch 15, Iteration 35

[I 2025-01-06 17:31:51,594] Trial 49 finished with value: 0.030816241138674316 and parameters: {'topK': 109, 'epochs': 36, 'lambda_i': 0.004244706136766889, 'lambda_j': 9.301058788101684e-05, 'learning_rate': 0.001603230774457764}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.34 seconds. Samples per second 3454.88
Epoch 2, Iteration 35736 in 7.43 seconds. Samples per second 4811.30
Epoch 3, Iteration 35736 in 5.98 seconds. Samples per second 5975.65
Epoch 4, Iteration 35736 in 5.12 seconds. Samples per second 6977.09
Epoch 5, Iteration 35736 in 4.61 seconds. Samples per second 7758.79
Epoch 6, Iteration 35736 in 4.36 seconds. Samples per second 8192.20
Epoch 7, Iteration 35736 in 3.91 seconds. Samples per second 9146.39
Epoch 8, Iteration 35736 in 3.70 seconds. Samples per second 9663.88
Epoch 9, Iteration 35736 in 3.75 seconds. Samples per second 9523.18
Epoch 10, Iteration 35736 in 3.45 seconds. Samples per second 10366.40
Epoch 11, Iteration 35736 in 3.40 seconds. Samples per second 10518.87
Epoch 12, Iteration 35736 in 3.39 seconds. Samples per second 10550.34
Epoch 13, Iteration 35736 in 3.33 seconds. Samples per second 10746.05
Epoch 14, Iteration 35736 in 3.31 seconds. Samples per second 10789.59
Epoch 15, Iteration 357

[I 2025-01-06 18:00:29,638] Trial 50 finished with value: 0.03271290566204792 and parameters: {'topK': 17, 'epochs': 42, 'lambda_i': 0.09903444822915823, 'lambda_j': 0.008076148013589614, 'learning_rate': 0.0003666296627385344}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.19 seconds. Samples per second 3506.23
Epoch 2, Iteration 35736 in 7.37 seconds. Samples per second 4846.48
Epoch 3, Iteration 35736 in 6.14 seconds. Samples per second 5818.29
Epoch 4, Iteration 35736 in 5.18 seconds. Samples per second 6895.18
Epoch 5, Iteration 35736 in 4.75 seconds. Samples per second 7520.96
Epoch 6, Iteration 35736 in 4.25 seconds. Samples per second 8402.81
Epoch 7, Iteration 35736 in 4.16 seconds. Samples per second 8584.64
Epoch 8, Iteration 35736 in 3.79 seconds. Samples per second 9438.60
Epoch 9, Iteration 35736 in 3.66 seconds. Samples per second 9762.11
Epoch 10, Iteration 35736 in 3.54 seconds. Samples per second 10094.63
Epoch 11, Iteration 35736 in 3.41 seconds. Samples per second 10488.49
Epoch 12, Iteration 35736 in 3.47 seconds. Samples per second 10300.39
Epoch 13, Iteration 35736 in 3.43 seconds. Samples per second 10406.20
Epoch 14, Iteration 35736 in 3.27 seconds. Samples per second 10916.51
Epoch 15, Iteration 357

[I 2025-01-06 18:27:45,415] Trial 51 finished with value: 0.0002664826418719655 and parameters: {'topK': 0, 'epochs': 39, 'lambda_i': 0.017390839701788702, 'lambda_j': 3.4633988078466044e-05, 'learning_rate': 0.08101283588587957}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.26 seconds. Samples per second 3482.26
Epoch 2, Iteration 35736 in 7.45 seconds. Samples per second 4796.26
Epoch 3, Iteration 35736 in 6.15 seconds. Samples per second 5810.68
Epoch 4, Iteration 35736 in 5.10 seconds. Samples per second 7001.39
Epoch 5, Iteration 35736 in 4.66 seconds. Samples per second 7662.71
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8551.61
Epoch 7, Iteration 35736 in 3.89 seconds. Samples per second 9190.56
Epoch 8, Iteration 35736 in 3.77 seconds. Samples per second 9488.27
Epoch 9, Iteration 35736 in 3.56 seconds. Samples per second 10029.58
Epoch 10, Iteration 35736 in 3.51 seconds. Samples per second 10185.28
Epoch 11, Iteration 35736 in 3.51 seconds. Samples per second 10174.28
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10692.45
Epoch 13, Iteration 35736 in 3.33 seconds. Samples per second 10720.61
Epoch 14, Iteration 35736 in 3.36 seconds. Samples per second 10637.68
Epoch 15, Iteration 35

[I 2025-01-06 18:56:07,279] Trial 52 finished with value: 0.039190252776141535 and parameters: {'topK': 34, 'epochs': 41, 'lambda_i': 0.013960888809487277, 'lambda_j': 1.6799281602112772e-05, 'learning_rate': 0.08796763002684524}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.15 seconds. Samples per second 3519.96
Epoch 2, Iteration 35736 in 7.40 seconds. Samples per second 4828.44
Epoch 3, Iteration 35736 in 5.98 seconds. Samples per second 5970.95
Epoch 4, Iteration 35736 in 5.08 seconds. Samples per second 7030.02
Epoch 5, Iteration 35736 in 4.71 seconds. Samples per second 7586.35
Epoch 6, Iteration 35736 in 4.21 seconds. Samples per second 8491.11
Epoch 7, Iteration 35736 in 3.97 seconds. Samples per second 9003.09
Epoch 8, Iteration 35736 in 3.77 seconds. Samples per second 9467.40
Epoch 9, Iteration 35736 in 3.57 seconds. Samples per second 9997.81
Epoch 10, Iteration 35736 in 3.55 seconds. Samples per second 10062.60
Epoch 11, Iteration 35736 in 3.45 seconds. Samples per second 10346.04
Epoch 12, Iteration 35736 in 3.40 seconds. Samples per second 10502.96
Epoch 13, Iteration 35736 in 3.44 seconds. Samples per second 10393.34
Epoch 14, Iteration 35736 in 3.43 seconds. Samples per second 10431.24
Epoch 15, Iteration 357

[I 2025-01-06 19:26:18,259] Trial 53 finished with value: 0.03758968481741144 and parameters: {'topK': 53, 'epochs': 46, 'lambda_i': 0.008322859461005423, 'lambda_j': 5.4397049191175345e-05, 'learning_rate': 0.05896560930225002}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.26 seconds. Samples per second 3483.86
Epoch 2, Iteration 35736 in 7.37 seconds. Samples per second 4848.63
Epoch 3, Iteration 35736 in 6.03 seconds. Samples per second 5929.59
Epoch 4, Iteration 35736 in 5.16 seconds. Samples per second 6920.91
Epoch 5, Iteration 35736 in 4.55 seconds. Samples per second 7848.50
Epoch 6, Iteration 35736 in 4.16 seconds. Samples per second 8598.19
Epoch 7, Iteration 35736 in 4.02 seconds. Samples per second 8899.81
Epoch 8, Iteration 35736 in 3.78 seconds. Samples per second 9454.15
Epoch 9, Iteration 35736 in 3.70 seconds. Samples per second 9654.62
Epoch 10, Iteration 35736 in 3.57 seconds. Samples per second 10016.56
Epoch 11, Iteration 35736 in 3.45 seconds. Samples per second 10371.35
Epoch 12, Iteration 35736 in 3.48 seconds. Samples per second 10267.20
Epoch 13, Iteration 35736 in 3.54 seconds. Samples per second 10087.06
Epoch 14, Iteration 35736 in 3.24 seconds. Samples per second 11030.25
Epoch 15, Iteration 357

[I 2025-01-06 19:56:55,040] Trial 54 finished with value: 0.04209128232743534 and parameters: {'topK': 15, 'epochs': 49, 'lambda_i': 0.027017165211977178, 'lambda_j': 0.002628902851498056, 'learning_rate': 0.09646320865803477}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.42 seconds. Samples per second 3428.48
Epoch 2, Iteration 35736 in 7.54 seconds. Samples per second 4739.98
Epoch 3, Iteration 35736 in 6.05 seconds. Samples per second 5907.25
Epoch 4, Iteration 35736 in 5.22 seconds. Samples per second 6848.17
Epoch 5, Iteration 35736 in 4.64 seconds. Samples per second 7697.13
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8552.67
Epoch 7, Iteration 35736 in 4.01 seconds. Samples per second 8915.96
Epoch 8, Iteration 35736 in 3.81 seconds. Samples per second 9380.15
Epoch 9, Iteration 35736 in 3.60 seconds. Samples per second 9932.75
Epoch 10, Iteration 35736 in 3.51 seconds. Samples per second 10182.89
Epoch 11, Iteration 35736 in 3.62 seconds. Samples per second 9868.71
Epoch 12, Iteration 35736 in 3.45 seconds. Samples per second 10371.17
Epoch 13, Iteration 35736 in 3.51 seconds. Samples per second 10192.47
Epoch 14, Iteration 35736 in 3.36 seconds. Samples per second 10640.85
Epoch 15, Iteration 3573

[I 2025-01-06 20:25:48,516] Trial 55 finished with value: 0.03655906989531107 and parameters: {'topK': 63, 'epochs': 43, 'lambda_i': 0.04829896968985289, 'lambda_j': 0.0028372708409589776, 'learning_rate': 0.04384400827985786}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.29 seconds. Samples per second 3472.02
Epoch 2, Iteration 35736 in 7.33 seconds. Samples per second 4874.41
Epoch 3, Iteration 35736 in 5.97 seconds. Samples per second 5989.81
Epoch 4, Iteration 35736 in 5.22 seconds. Samples per second 6846.72
Epoch 5, Iteration 35736 in 4.75 seconds. Samples per second 7517.58
Epoch 6, Iteration 35736 in 4.21 seconds. Samples per second 8489.46
Epoch 7, Iteration 35736 in 4.04 seconds. Samples per second 8854.81
Epoch 8, Iteration 35736 in 3.69 seconds. Samples per second 9689.31
Epoch 9, Iteration 35736 in 3.56 seconds. Samples per second 10046.02
Epoch 10, Iteration 35736 in 3.54 seconds. Samples per second 10097.74
Epoch 11, Iteration 35736 in 3.37 seconds. Samples per second 10611.19
Epoch 12, Iteration 35736 in 3.39 seconds. Samples per second 10526.91
Epoch 13, Iteration 35736 in 3.53 seconds. Samples per second 10133.62
Epoch 14, Iteration 35736 in 3.25 seconds. Samples per second 10989.76
Epoch 15, Iteration 35

[I 2025-01-06 20:55:46,991] Trial 56 finished with value: 0.037003501656889586 and parameters: {'topK': 41, 'epochs': 47, 'lambda_i': 0.0259776152180188, 'lambda_j': 0.000573767453608224, 'learning_rate': 0.029214733155871423}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.85 seconds. Samples per second 3292.62
Epoch 2, Iteration 35736 in 7.74 seconds. Samples per second 4615.15
Epoch 3, Iteration 35736 in 6.22 seconds. Samples per second 5741.75
Epoch 4, Iteration 35736 in 5.44 seconds. Samples per second 6566.53
Epoch 5, Iteration 35736 in 4.69 seconds. Samples per second 7625.45
Epoch 6, Iteration 35736 in 4.23 seconds. Samples per second 8447.84
Epoch 7, Iteration 35736 in 4.03 seconds. Samples per second 8865.14
Epoch 8, Iteration 35736 in 3.83 seconds. Samples per second 9326.58
Epoch 9, Iteration 35736 in 3.64 seconds. Samples per second 9830.88
Epoch 10, Iteration 35736 in 3.60 seconds. Samples per second 9938.00
Epoch 11, Iteration 35736 in 3.43 seconds. Samples per second 10419.30
Epoch 12, Iteration 35736 in 3.46 seconds. Samples per second 10325.99
Epoch 13, Iteration 35736 in 3.41 seconds. Samples per second 10473.89
Epoch 14, Iteration 35736 in 3.28 seconds. Samples per second 10897.26
Epoch 15, Iteration 3573

[I 2025-01-06 21:28:10,122] Trial 57 finished with value: 0.032657467754192794 and parameters: {'topK': 19, 'epochs': 48, 'lambda_i': 0.010413241366502056, 'lambda_j': 0.00093873617534971, 'learning_rate': 0.00010477816585768099}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.26 seconds. Samples per second 3482.98
Epoch 2, Iteration 35736 in 7.60 seconds. Samples per second 4702.86
Epoch 3, Iteration 35736 in 6.04 seconds. Samples per second 5912.84
Epoch 4, Iteration 35736 in 5.18 seconds. Samples per second 6899.15
Epoch 5, Iteration 35736 in 4.55 seconds. Samples per second 7855.38
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8557.15
Epoch 7, Iteration 35736 in 3.95 seconds. Samples per second 9047.13
Epoch 8, Iteration 35736 in 3.81 seconds. Samples per second 9376.67
Epoch 9, Iteration 35736 in 3.63 seconds. Samples per second 9849.87
Epoch 10, Iteration 35736 in 3.55 seconds. Samples per second 10070.65
Epoch 11, Iteration 35736 in 3.50 seconds. Samples per second 10219.57
Epoch 12, Iteration 35736 in 3.46 seconds. Samples per second 10329.75
Epoch 13, Iteration 35736 in 3.38 seconds. Samples per second 10572.47
Epoch 14, Iteration 35736 in 3.31 seconds. Samples per second 10804.91
Epoch 15, Iteration 357

[I 2025-01-06 21:59:05,114] Trial 58 finished with value: 0.035223853701749505 and parameters: {'topK': 144, 'epochs': 49, 'lambda_i': 0.0006479079657965158, 'lambda_j': 0.00028865427857367265, 'learning_rate': 0.07482277759234411}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.11 seconds. Samples per second 3534.63
Epoch 2, Iteration 35736 in 7.45 seconds. Samples per second 4794.29
Epoch 3, Iteration 35736 in 6.12 seconds. Samples per second 5839.70
Epoch 4, Iteration 35736 in 5.11 seconds. Samples per second 6996.72
Epoch 5, Iteration 35736 in 4.58 seconds. Samples per second 7802.76
Epoch 6, Iteration 35736 in 4.22 seconds. Samples per second 8473.12
Epoch 7, Iteration 35736 in 3.91 seconds. Samples per second 9129.04
Epoch 8, Iteration 35736 in 3.76 seconds. Samples per second 9500.03
Epoch 9, Iteration 35736 in 3.57 seconds. Samples per second 10010.08
Epoch 10, Iteration 35736 in 3.53 seconds. Samples per second 10126.85
Epoch 11, Iteration 35736 in 3.58 seconds. Samples per second 9990.40
Epoch 12, Iteration 35736 in 3.41 seconds. Samples per second 10472.47
Epoch 13, Iteration 35736 in 3.34 seconds. Samples per second 10697.37
Epoch 14, Iteration 35736 in 3.37 seconds. Samples per second 10610.37
Epoch 15, Iteration 357

[I 2025-01-06 22:22:28,997] Trial 59 finished with value: 0.03070893034447749 and parameters: {'topK': 234, 'epochs': 19, 'lambda_i': 0.003763089430980978, 'lambda_j': 0.0070001681381142485, 'learning_rate': 0.038619909781196574}. Best is trial 54 with value: 0.04209128232743534.


Epoch 1, Iteration 35736 in 10.24 seconds. Samples per second 3489.73
Epoch 2, Iteration 35736 in 7.41 seconds. Samples per second 4820.17
Epoch 3, Iteration 35736 in 5.96 seconds. Samples per second 5999.54
Epoch 4, Iteration 35736 in 5.04 seconds. Samples per second 7087.87
Epoch 5, Iteration 35736 in 4.62 seconds. Samples per second 7733.51
Epoch 6, Iteration 35736 in 4.31 seconds. Samples per second 8292.45
Epoch 7, Iteration 35736 in 4.09 seconds. Samples per second 8731.50
Epoch 8, Iteration 35736 in 3.78 seconds. Samples per second 9447.33
Epoch 9, Iteration 35736 in 3.61 seconds. Samples per second 9899.85
Epoch 10, Iteration 35736 in 3.54 seconds. Samples per second 10101.34
Epoch 11, Iteration 35736 in 3.39 seconds. Samples per second 10526.77
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10683.61
Epoch 13, Iteration 35736 in 3.40 seconds. Samples per second 10500.84
Epoch 14, Iteration 35736 in 3.23 seconds. Samples per second 11068.82
Epoch 15, Iteration 357

[I 2025-01-06 22:47:21,213] Trial 60 finished with value: 0.03306059768684737 and parameters: {'topK': 47, 'epochs': 36, 'lambda_i': 0.024827949166330205, 'lambda_j': 0.0014621160249445268, 'learning_rate': 0.006089735001563951}. Best is trial 54 with value: 0.04209128232743534.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = SLIM_BPR_Python(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Epoch 1, Iteration 35736 in 10.99 seconds. Samples per second 3251.12
Epoch 2, Iteration 35736 in 7.80 seconds. Samples per second 4581.44
Epoch 3, Iteration 35736 in 6.29 seconds. Samples per second 5684.35
Epoch 4, Iteration 35736 in 5.30 seconds. Samples per second 6736.60
Epoch 5, Iteration 35736 in 4.71 seconds. Samples per second 7595.25
Epoch 6, Iteration 35736 in 4.40 seconds. Samples per second 8117.23
Epoch 7, Iteration 35736 in 4.11 seconds. Samples per second 8688.60
Epoch 8, Iteration 35736 in 3.82 seconds. Samples per second 9363.42
Epoch 9, Iteration 35736 in 3.79 seconds. Samples per second 9421.27
Epoch 10, Iteration 35736 in 3.60 seconds. Samples per second 9927.71
Epoch 11, Iteration 35736 in 3.49 seconds. Samples per second 10237.01
Epoch 12, Iteration 35736 in 3.47 seconds. Samples per second 10310.13
Epoch 13, Iteration 35736 in 3.40 seconds. Samples per second 10505.01
Epoch 14, Iteration 35736 in 3.40 seconds. Samples per second 10515.15
Epoch 15, Iteration 3573

# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/working/RECsys_Challenge2024/Dataset/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_SLIM_BPR_MAP.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/best_params_SLIM_BPR_MAP.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/history_SLIM_BPR_MAP.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/Submission/submission_SLIM_BPR_MAP.csv' updated successfully.
