# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6451, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 6451 (delta 0), reused 0 (delta 0), pack-reused 6450 (from 2)[K
Receiving objects: 100% (6451/6451), 404.15 MiB | 19.11 MiB/s, done.
Resolving deltas: 100% (3575/3575), done.
Updating files: 100% (498/498), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K’:
30353 |       [01;35m[K__pyx_t_4

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/SLIM'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SLIM_BPR',
    'n_folds': 5,
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIM_BPR_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.SLIM.SLIM_BPR_Python import SLIM_BPR_Python

def objective_function_SLIM_BPR(optuna_trial):

    full_hyperp = {
                    "topK": optuna_trial.suggest_int("topK", 0, 500),
                    "epochs": optuna_trial.suggest_int("epochs", 1, 50),
                    "lambda_i": optuna_trial.suggest_float("lambda_i", 1e-5, 1e-1, log=True),
                    "lambda_j": optuna_trial.suggest_float("lambda_j", 1e-5, 1e-1, log=True),
                    "learning_rate": optuna_trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
                }
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = SLIM_BPR_Python(URM_train_fold)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[10])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[10]["MAP"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_SLIM_BPR, n_trials=20)

[I 2025-01-05 20:31:57,433] Using an existing study with name 'hyperparameters_tuning_SLIM_BPR_MAP' instead of creating a new one.


Epoch 1, Iteration 35736 in 10.66 seconds. Samples per second 3352.02
Epoch 2, Iteration 35736 in 7.71 seconds. Samples per second 4634.21
Epoch 3, Iteration 35736 in 6.21 seconds. Samples per second 5754.69
Epoch 4, Iteration 35736 in 5.31 seconds. Samples per second 6724.24
Epoch 5, Iteration 35736 in 4.55 seconds. Samples per second 7849.78
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8547.33
Epoch 7, Iteration 35736 in 3.94 seconds. Samples per second 9081.15
Epoch 8, Iteration 35736 in 3.70 seconds. Samples per second 9663.26
Epoch 9, Iteration 35736 in 3.54 seconds. Samples per second 10090.82
Epoch 10, Iteration 35736 in 3.38 seconds. Samples per second 10585.69
Epoch 11, Iteration 35736 in 3.33 seconds. Samples per second 10727.70
Epoch 12, Iteration 35736 in 3.27 seconds. Samples per second 10943.69
Epoch 13, Iteration 35736 in 3.23 seconds. Samples per second 11057.80
Epoch 14, Iteration 35736 in 3.13 seconds. Samples per second 11429.48
Epoch 15, Iteration 35

[I 2025-01-05 21:05:48,576] Trial 15 finished with value: 0.03287386861548083 and parameters: {'topK': 160, 'epochs': 48, 'lambda_i': 6.0524476121265005e-05, 'lambda_j': 0.005434123937595034, 'learning_rate': 0.016465850478772966}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.56 seconds. Samples per second 3738.00
Epoch 2, Iteration 35736 in 6.92 seconds. Samples per second 5163.43
Epoch 3, Iteration 35736 in 5.85 seconds. Samples per second 6110.36
Epoch 4, Iteration 35736 in 4.97 seconds. Samples per second 7197.29
Epoch 5, Iteration 35736 in 4.47 seconds. Samples per second 8000.00
Epoch 6, Iteration 35736 in 4.07 seconds. Samples per second 8780.66
Epoch 7, Iteration 35736 in 3.83 seconds. Samples per second 9329.80
Epoch 8, Iteration 35736 in 3.58 seconds. Samples per second 9994.66
Epoch 9, Iteration 35736 in 3.52 seconds. Samples per second 10162.27
Epoch 10, Iteration 35736 in 3.53 seconds. Samples per second 10133.50
Epoch 11, Iteration 35736 in 3.39 seconds. Samples per second 10552.63
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10559.99
Epoch 13, Iteration 35736 in 3.36 seconds. Samples per second 10639.20
Epoch 14, Iteration 35736 in 3.30 seconds. Samples per second 10830.37
Epoch 15, Iteration 357

[I 2025-01-05 21:43:09,783] Trial 16 finished with value: 0.03031257409082953 and parameters: {'topK': 170, 'epochs': 50, 'lambda_i': 0.0011642745297896705, 'lambda_j': 0.0035065348523774043, 'learning_rate': 0.0009613859600652928}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.50 seconds. Samples per second 3762.65
Epoch 2, Iteration 35736 in 6.99 seconds. Samples per second 5110.98
Epoch 3, Iteration 35736 in 5.68 seconds. Samples per second 6291.41
Epoch 4, Iteration 35736 in 4.97 seconds. Samples per second 7185.33
Epoch 5, Iteration 35736 in 4.71 seconds. Samples per second 7590.09
Epoch 6, Iteration 35736 in 4.07 seconds. Samples per second 8788.57
Epoch 7, Iteration 35736 in 4.65 seconds. Samples per second 7680.27
Epoch 8, Iteration 35736 in 3.95 seconds. Samples per second 9053.44
Epoch 9, Iteration 35736 in 3.84 seconds. Samples per second 9317.63
Epoch 10, Iteration 35736 in 3.55 seconds. Samples per second 10068.17
Epoch 11, Iteration 35736 in 3.41 seconds. Samples per second 10492.81
Epoch 12, Iteration 35736 in 3.39 seconds. Samples per second 10539.56
Epoch 13, Iteration 35736 in 3.31 seconds. Samples per second 10806.20
Epoch 14, Iteration 35736 in 3.24 seconds. Samples per second 11036.68
Epoch 15, Iteration 3573

[I 2025-01-05 22:17:30,888] Trial 17 finished with value: 0.03538101520406507 and parameters: {'topK': 56, 'epochs': 44, 'lambda_i': 4.57841939747724e-05, 'lambda_j': 0.0036287530525200753, 'learning_rate': 0.021131490305326184}. Best is trial 17 with value: 0.03538101520406507.


Epoch 1, Iteration 35736 in 9.61 seconds. Samples per second 3718.22
Epoch 2, Iteration 35736 in 7.08 seconds. Samples per second 5044.12
Epoch 3, Iteration 35736 in 5.71 seconds. Samples per second 6263.51
Epoch 4, Iteration 35736 in 4.90 seconds. Samples per second 7294.91
Epoch 5, Iteration 35736 in 4.39 seconds. Samples per second 8139.26
Epoch 6, Iteration 35736 in 4.08 seconds. Samples per second 8760.13
Epoch 7, Iteration 35736 in 3.99 seconds. Samples per second 8952.45
Epoch 8, Iteration 35736 in 3.69 seconds. Samples per second 9677.84
Epoch 9, Iteration 35736 in 3.69 seconds. Samples per second 9676.55
Epoch 10, Iteration 35736 in 3.35 seconds. Samples per second 10657.98
Epoch 11, Iteration 35736 in 3.27 seconds. Samples per second 10915.19
Epoch 12, Iteration 35736 in 3.27 seconds. Samples per second 10924.85
Epoch 13, Iteration 35736 in 3.20 seconds. Samples per second 11154.35
Epoch 14, Iteration 35736 in 3.18 seconds. Samples per second 11231.07
Epoch 15, Iteration 3573

[I 2025-01-05 22:51:03,686] Trial 18 finished with value: 0.03306744991539222 and parameters: {'topK': 43, 'epochs': 43, 'lambda_i': 0.0008225677817495563, 'lambda_j': 0.0007404847245680229, 'learning_rate': 0.003915666820949746}. Best is trial 17 with value: 0.03538101520406507.


Epoch 1, Iteration 35736 in 9.65 seconds. Samples per second 3701.83
Epoch 2, Iteration 35736 in 7.21 seconds. Samples per second 4954.54
Epoch 3, Iteration 35736 in 5.74 seconds. Samples per second 6223.62
Epoch 4, Iteration 35736 in 4.99 seconds. Samples per second 7168.43
Epoch 5, Iteration 35736 in 4.46 seconds. Samples per second 8017.00
Epoch 6, Iteration 35736 in 4.17 seconds. Samples per second 8579.69
Epoch 7, Iteration 35736 in 3.83 seconds. Samples per second 9334.51
Epoch 8, Iteration 35736 in 3.68 seconds. Samples per second 9712.02
Epoch 9, Iteration 35736 in 3.50 seconds. Samples per second 10217.01
Epoch 10, Iteration 35736 in 3.36 seconds. Samples per second 10627.51
Epoch 11, Iteration 35736 in 3.40 seconds. Samples per second 10518.59
Epoch 12, Iteration 35736 in 3.22 seconds. Samples per second 11092.46
Epoch 13, Iteration 35736 in 5.13 seconds. Samples per second 6970.70
Epoch 14, Iteration 35736 in 3.74 seconds. Samples per second 9555.55
Epoch 15, Iteration 35736

[I 2025-01-05 23:25:47,553] Trial 19 finished with value: 0.03630883099692012 and parameters: {'topK': 52, 'epochs': 42, 'lambda_i': 0.0018943861042101076, 'lambda_j': 0.0007117522135492006, 'learning_rate': 0.0347450297140694}. Best is trial 19 with value: 0.03630883099692012.


Epoch 1, Iteration 35736 in 9.61 seconds. Samples per second 3718.51
Epoch 2, Iteration 35736 in 7.00 seconds. Samples per second 5108.26
Epoch 3, Iteration 35736 in 5.66 seconds. Samples per second 6316.54
Epoch 4, Iteration 35736 in 4.89 seconds. Samples per second 7301.00
Epoch 5, Iteration 35736 in 4.40 seconds. Samples per second 8115.63
Epoch 6, Iteration 35736 in 4.11 seconds. Samples per second 8700.95
Epoch 7, Iteration 35736 in 3.80 seconds. Samples per second 9396.70
Epoch 8, Iteration 35736 in 3.59 seconds. Samples per second 9956.35
Epoch 9, Iteration 35736 in 3.47 seconds. Samples per second 10304.56
Epoch 10, Iteration 35736 in 3.39 seconds. Samples per second 10539.07
Epoch 11, Iteration 35736 in 3.33 seconds. Samples per second 10717.45
Epoch 12, Iteration 35736 in 3.27 seconds. Samples per second 10942.26
Epoch 13, Iteration 35736 in 3.36 seconds. Samples per second 10631.37
Epoch 14, Iteration 35736 in 3.22 seconds. Samples per second 11102.45
Epoch 15, Iteration 357

[I 2025-01-05 23:58:41,342] Trial 20 finished with value: 0.035360169976093724 and parameters: {'topK': 65, 'epochs': 37, 'lambda_i': 0.0028229364580842227, 'lambda_j': 0.00020413742202400771, 'learning_rate': 0.032609172610777795}. Best is trial 19 with value: 0.03630883099692012.


Epoch 1, Iteration 35736 in 9.54 seconds. Samples per second 3746.26
Epoch 2, Iteration 35736 in 7.00 seconds. Samples per second 5107.73
Epoch 3, Iteration 35736 in 5.60 seconds. Samples per second 6384.37
Epoch 4, Iteration 35736 in 4.88 seconds. Samples per second 7330.38
Epoch 5, Iteration 35736 in 4.43 seconds. Samples per second 8069.73
Epoch 6, Iteration 35736 in 4.05 seconds. Samples per second 8824.85
Epoch 7, Iteration 35736 in 3.92 seconds. Samples per second 9126.97
Epoch 8, Iteration 35736 in 3.64 seconds. Samples per second 9809.37
Epoch 9, Iteration 35736 in 3.49 seconds. Samples per second 10240.04
Epoch 10, Iteration 35736 in 3.34 seconds. Samples per second 10695.42
Epoch 11, Iteration 35736 in 3.29 seconds. Samples per second 10849.84
Epoch 12, Iteration 35736 in 3.27 seconds. Samples per second 10921.64
Epoch 13, Iteration 35736 in 3.19 seconds. Samples per second 11185.73
Epoch 14, Iteration 35736 in 3.12 seconds. Samples per second 11435.62
Epoch 15, Iteration 357

[I 2025-01-06 00:31:50,487] Trial 21 finished with value: 0.036057117428486475 and parameters: {'topK': 53, 'epochs': 38, 'lambda_i': 0.001959601111672217, 'lambda_j': 0.0002245675066089235, 'learning_rate': 0.03786018565736972}. Best is trial 19 with value: 0.03630883099692012.


Epoch 1, Iteration 35736 in 9.59 seconds. Samples per second 3725.15
Epoch 2, Iteration 35736 in 6.94 seconds. Samples per second 5152.46
Epoch 3, Iteration 35736 in 5.65 seconds. Samples per second 6329.94
Epoch 4, Iteration 35736 in 4.87 seconds. Samples per second 7345.37
Epoch 5, Iteration 35736 in 4.34 seconds. Samples per second 8224.92
Epoch 6, Iteration 35736 in 4.05 seconds. Samples per second 8834.38
Epoch 7, Iteration 35736 in 6.25 seconds. Samples per second 5713.27
Epoch 8, Iteration 35736 in 4.59 seconds. Samples per second 7786.56
Epoch 9, Iteration 35736 in 3.69 seconds. Samples per second 9690.08
Epoch 10, Iteration 35736 in 3.51 seconds. Samples per second 10179.09
Epoch 11, Iteration 35736 in 3.46 seconds. Samples per second 10339.83
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10701.87
Epoch 13, Iteration 35736 in 3.27 seconds. Samples per second 10924.38
Epoch 14, Iteration 35736 in 3.23 seconds. Samples per second 11057.19
Epoch 15, Iteration 3573

[I 2025-01-06 01:06:34,034] Trial 22 finished with value: 0.037821507705368254 and parameters: {'topK': 40, 'epochs': 44, 'lambda_i': 0.002212092497238432, 'lambda_j': 0.0012256183155757436, 'learning_rate': 0.05406362098789897}. Best is trial 22 with value: 0.037821507705368254.


Epoch 1, Iteration 35736 in 9.51 seconds. Samples per second 3759.61
Epoch 2, Iteration 35736 in 6.95 seconds. Samples per second 5140.84
Epoch 3, Iteration 35736 in 5.69 seconds. Samples per second 6280.65
Epoch 4, Iteration 35736 in 4.92 seconds. Samples per second 7267.29
Epoch 5, Iteration 35736 in 4.35 seconds. Samples per second 8217.31
Epoch 6, Iteration 35736 in 4.13 seconds. Samples per second 8648.03
Epoch 7, Iteration 35736 in 3.86 seconds. Samples per second 9266.54
Epoch 8, Iteration 35736 in 3.67 seconds. Samples per second 9737.00
Epoch 9, Iteration 35736 in 3.48 seconds. Samples per second 10271.35
Epoch 10, Iteration 35736 in 3.46 seconds. Samples per second 10337.50
Epoch 11, Iteration 35736 in 3.38 seconds. Samples per second 10577.08
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10574.76
Epoch 13, Iteration 35736 in 3.24 seconds. Samples per second 11042.58
Epoch 14, Iteration 35736 in 3.17 seconds. Samples per second 11288.07
Epoch 15, Iteration 357

[I 2025-01-06 01:39:04,056] Trial 23 finished with value: 0.03331864825621973 and parameters: {'topK': 161, 'epochs': 32, 'lambda_i': 0.003188424607372424, 'lambda_j': 0.00036531214942128347, 'learning_rate': 0.04804444963148089}. Best is trial 22 with value: 0.037821507705368254.


Epoch 1, Iteration 35736 in 9.66 seconds. Samples per second 3700.28
Epoch 2, Iteration 35736 in 6.99 seconds. Samples per second 5113.08
Epoch 3, Iteration 35736 in 5.67 seconds. Samples per second 6305.57
Epoch 4, Iteration 35736 in 4.90 seconds. Samples per second 7291.11
Epoch 5, Iteration 35736 in 4.34 seconds. Samples per second 8240.41
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8552.04
Epoch 7, Iteration 35736 in 3.76 seconds. Samples per second 9516.32
Epoch 8, Iteration 35736 in 3.57 seconds. Samples per second 10019.40
Epoch 9, Iteration 35736 in 3.42 seconds. Samples per second 10441.01
Epoch 10, Iteration 35736 in 3.30 seconds. Samples per second 10817.98
Epoch 11, Iteration 35736 in 3.26 seconds. Samples per second 10974.98
Epoch 12, Iteration 35736 in 3.18 seconds. Samples per second 11248.40
Epoch 13, Iteration 35736 in 3.23 seconds. Samples per second 11075.73
Epoch 14, Iteration 35736 in 3.16 seconds. Samples per second 11298.16
Epoch 15, Iteration 35

[I 2025-01-06 02:11:31,891] Trial 24 finished with value: 0.04073807739486808 and parameters: {'topK': 15, 'epochs': 39, 'lambda_i': 0.0028847229386486806, 'lambda_j': 0.00011497533581605925, 'learning_rate': 0.0977220338808867}. Best is trial 24 with value: 0.04073807739486808.


Epoch 1, Iteration 35736 in 9.75 seconds. Samples per second 3663.41
Epoch 2, Iteration 35736 in 7.16 seconds. Samples per second 4991.53
Epoch 3, Iteration 35736 in 5.65 seconds. Samples per second 6322.38
Epoch 4, Iteration 35736 in 4.94 seconds. Samples per second 7236.93
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8046.82
Epoch 6, Iteration 35736 in 4.08 seconds. Samples per second 8751.08
Epoch 7, Iteration 35736 in 3.88 seconds. Samples per second 9214.04
Epoch 8, Iteration 35736 in 3.59 seconds. Samples per second 9946.89
Epoch 9, Iteration 35736 in 3.52 seconds. Samples per second 10144.41
Epoch 10, Iteration 35736 in 3.36 seconds. Samples per second 10642.18
Epoch 11, Iteration 35736 in 3.26 seconds. Samples per second 10948.99
Epoch 12, Iteration 35736 in 3.28 seconds. Samples per second 10899.34
Epoch 13, Iteration 35736 in 3.19 seconds. Samples per second 11216.82
Epoch 14, Iteration 35736 in 3.17 seconds. Samples per second 11289.48
Epoch 15, Iteration 357

[I 2025-01-06 02:45:48,334] Trial 25 finished with value: 0.04090163381949502 and parameters: {'topK': 5, 'epochs': 42, 'lambda_i': 0.00789508840047421, 'lambda_j': 0.0013443219746281782, 'learning_rate': 0.08354396342758996}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.60 seconds. Samples per second 3721.78
Epoch 2, Iteration 35736 in 7.00 seconds. Samples per second 5102.73
Epoch 3, Iteration 35736 in 5.76 seconds. Samples per second 6200.46
Epoch 4, Iteration 35736 in 4.87 seconds. Samples per second 7339.44
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8049.74
Epoch 6, Iteration 35736 in 3.99 seconds. Samples per second 8959.67
Epoch 7, Iteration 35736 in 3.85 seconds. Samples per second 9282.28
Epoch 8, Iteration 35736 in 3.56 seconds. Samples per second 10034.75
Epoch 9, Iteration 35736 in 3.51 seconds. Samples per second 10170.93
Epoch 10, Iteration 35736 in 3.49 seconds. Samples per second 10234.98
Epoch 11, Iteration 35736 in 3.40 seconds. Samples per second 10514.33
Epoch 12, Iteration 35736 in 3.31 seconds. Samples per second 10792.22
Epoch 13, Iteration 35736 in 3.40 seconds. Samples per second 10520.13
Epoch 14, Iteration 35736 in 3.25 seconds. Samples per second 11009.52
Epoch 15, Iteration 35

[I 2025-01-06 03:17:31,182] Trial 26 finished with value: 0.040549270590639105 and parameters: {'topK': 7, 'epochs': 32, 'lambda_i': 0.007436823347118272, 'lambda_j': 0.0013328576361743142, 'learning_rate': 0.09807946723569623}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.94 seconds. Samples per second 3593.76
Epoch 2, Iteration 35736 in 7.24 seconds. Samples per second 4938.00
Epoch 3, Iteration 35736 in 5.74 seconds. Samples per second 6226.32
Epoch 4, Iteration 35736 in 4.86 seconds. Samples per second 7351.07
Epoch 5, Iteration 35736 in 4.35 seconds. Samples per second 8214.21
Epoch 6, Iteration 35736 in 4.01 seconds. Samples per second 8916.93
Epoch 7, Iteration 35736 in 3.82 seconds. Samples per second 9346.42
Epoch 8, Iteration 35736 in 3.61 seconds. Samples per second 9886.68
Epoch 9, Iteration 35736 in 3.62 seconds. Samples per second 9867.78
Epoch 10, Iteration 35736 in 3.38 seconds. Samples per second 10586.95
Epoch 11, Iteration 35736 in 3.32 seconds. Samples per second 10760.20
Epoch 12, Iteration 35736 in 3.32 seconds. Samples per second 10775.46
Epoch 13, Iteration 35736 in 3.20 seconds. Samples per second 11163.32
Epoch 14, Iteration 35736 in 3.18 seconds. Samples per second 11235.18
Epoch 15, Iteration 3573

[I 2025-01-06 03:46:57,064] Trial 27 finished with value: 0.03593342958308966 and parameters: {'topK': 2, 'epochs': 31, 'lambda_i': 0.008286218661167834, 'lambda_j': 4.588526078624738e-05, 'learning_rate': 0.09751943480460187}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.63 seconds. Samples per second 3709.59
Epoch 2, Iteration 35736 in 6.96 seconds. Samples per second 5137.76
Epoch 3, Iteration 35736 in 5.68 seconds. Samples per second 6296.83
Epoch 4, Iteration 35736 in 4.82 seconds. Samples per second 7412.87
Epoch 5, Iteration 35736 in 4.46 seconds. Samples per second 8011.59
Epoch 6, Iteration 35736 in 4.02 seconds. Samples per second 8887.26
Epoch 7, Iteration 35736 in 3.82 seconds. Samples per second 9344.01
Epoch 8, Iteration 35736 in 3.65 seconds. Samples per second 9787.18
Epoch 9, Iteration 35736 in 3.48 seconds. Samples per second 10269.22
Epoch 10, Iteration 35736 in 3.41 seconds. Samples per second 10485.89
Epoch 11, Iteration 35736 in 3.43 seconds. Samples per second 10406.80
Epoch 12, Iteration 35736 in 3.25 seconds. Samples per second 10994.14
Epoch 13, Iteration 35736 in 3.26 seconds. Samples per second 10952.97
Epoch 14, Iteration 35736 in 3.29 seconds. Samples per second 10875.35
Epoch 15, Iteration 357

[I 2025-01-06 04:18:00,192] Trial 28 finished with value: 0.0362325332275968 and parameters: {'topK': 87, 'epochs': 33, 'lambda_i': 0.019127066664817095, 'lambda_j': 0.0001337340749047079, 'learning_rate': 0.09878164151056976}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.62 seconds. Samples per second 3713.80
Epoch 2, Iteration 35736 in 7.60 seconds. Samples per second 4701.46
Epoch 3, Iteration 35736 in 5.95 seconds. Samples per second 6004.17
Epoch 4, Iteration 35736 in 4.91 seconds. Samples per second 7274.79
Epoch 5, Iteration 35736 in 4.35 seconds. Samples per second 8207.68
Epoch 6, Iteration 35736 in 4.03 seconds. Samples per second 8862.64
Epoch 7, Iteration 35736 in 3.77 seconds. Samples per second 9475.18
Epoch 8, Iteration 35736 in 3.63 seconds. Samples per second 9838.36
Epoch 9, Iteration 35736 in 3.49 seconds. Samples per second 10246.16
Epoch 10, Iteration 35736 in 3.41 seconds. Samples per second 10478.52
Epoch 11, Iteration 35736 in 3.32 seconds. Samples per second 10774.02
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10574.45
Epoch 13, Iteration 35736 in 3.23 seconds. Samples per second 11075.92
Epoch 14, Iteration 35736 in 3.24 seconds. Samples per second 11035.78
Epoch 15, Iteration 357

[I 2025-01-06 04:49:50,407] Trial 29 finished with value: 0.031024648250114945 and parameters: {'topK': 337, 'epochs': 23, 'lambda_i': 0.005246984278408396, 'lambda_j': 1.1113571110260673e-05, 'learning_rate': 0.061391394059511495}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.86 seconds. Samples per second 3625.77
Epoch 2, Iteration 35736 in 7.18 seconds. Samples per second 4974.88
Epoch 3, Iteration 35736 in 5.79 seconds. Samples per second 6174.48
Epoch 4, Iteration 35736 in 4.95 seconds. Samples per second 7222.22
Epoch 5, Iteration 35736 in 4.35 seconds. Samples per second 8209.98
Epoch 6, Iteration 35736 in 4.01 seconds. Samples per second 8913.83
Epoch 7, Iteration 35736 in 3.86 seconds. Samples per second 9249.66
Epoch 8, Iteration 35736 in 3.64 seconds. Samples per second 9806.15
Epoch 9, Iteration 35736 in 3.55 seconds. Samples per second 10062.74
Epoch 10, Iteration 35736 in 3.42 seconds. Samples per second 10438.86
Epoch 11, Iteration 35736 in 3.33 seconds. Samples per second 10724.15
Epoch 12, Iteration 35736 in 3.40 seconds. Samples per second 10516.28
Epoch 13, Iteration 35736 in 3.28 seconds. Samples per second 10897.18
Epoch 14, Iteration 35736 in 3.21 seconds. Samples per second 11124.06
Epoch 15, Iteration 357

[I 2025-01-06 05:20:12,976] Trial 30 finished with value: 0.03203058222895372 and parameters: {'topK': 119, 'epochs': 28, 'lambda_i': 0.0004904642237354136, 'lambda_j': 0.0016771752907976014, 'learning_rate': 0.015022986433812857}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.63 seconds. Samples per second 3710.94
Epoch 2, Iteration 35736 in 6.94 seconds. Samples per second 5148.70
Epoch 3, Iteration 35736 in 5.58 seconds. Samples per second 6408.94
Epoch 4, Iteration 35736 in 4.90 seconds. Samples per second 7292.89
Epoch 5, Iteration 35736 in 4.37 seconds. Samples per second 8184.56
Epoch 6, Iteration 35736 in 4.05 seconds. Samples per second 8820.38
Epoch 7, Iteration 35736 in 3.79 seconds. Samples per second 9439.45
Epoch 8, Iteration 35736 in 3.67 seconds. Samples per second 9734.99
Epoch 9, Iteration 35736 in 3.51 seconds. Samples per second 10185.24
Epoch 10, Iteration 35736 in 3.43 seconds. Samples per second 10428.38
Epoch 11, Iteration 35736 in 3.32 seconds. Samples per second 10771.02
Epoch 12, Iteration 35736 in 3.30 seconds. Samples per second 10832.51
Epoch 13, Iteration 35736 in 3.26 seconds. Samples per second 10977.50
Epoch 14, Iteration 35736 in 3.23 seconds. Samples per second 11069.78
Epoch 15, Iteration 357

[I 2025-01-06 05:54:35,142] Trial 31 finished with value: 0.039213382363547614 and parameters: {'topK': 25, 'epochs': 46, 'lambda_i': 0.020203426149977537, 'lambda_j': 0.0012619715361527578, 'learning_rate': 0.05373244674737784}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 10.08 seconds. Samples per second 3544.60
Epoch 2, Iteration 35736 in 7.36 seconds. Samples per second 4857.83
Epoch 3, Iteration 35736 in 6.07 seconds. Samples per second 5890.93
Epoch 4, Iteration 35736 in 5.30 seconds. Samples per second 6748.70
Epoch 5, Iteration 35736 in 4.71 seconds. Samples per second 7580.89
Epoch 6, Iteration 35736 in 4.31 seconds. Samples per second 8295.93
Epoch 7, Iteration 35736 in 4.04 seconds. Samples per second 8843.21
Epoch 8, Iteration 35736 in 3.79 seconds. Samples per second 9434.43
Epoch 9, Iteration 35736 in 3.61 seconds. Samples per second 9891.57
Epoch 10, Iteration 35736 in 3.57 seconds. Samples per second 9997.63
Epoch 11, Iteration 35736 in 3.41 seconds. Samples per second 10480.40
Epoch 12, Iteration 35736 in 3.45 seconds. Samples per second 10366.32
Epoch 13, Iteration 35736 in 3.35 seconds. Samples per second 10669.61
Epoch 14, Iteration 35736 in 3.18 seconds. Samples per second 11229.31
Epoch 15, Iteration 3573

[I 2025-01-06 06:28:59,148] Trial 32 finished with value: 0.04002380472623519 and parameters: {'topK': 22, 'epochs': 47, 'lambda_i': 0.020263504716694077, 'lambda_j': 0.00047570775780803477, 'learning_rate': 0.06354402019361458}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.96 seconds. Samples per second 3589.39
Epoch 2, Iteration 35736 in 7.22 seconds. Samples per second 4952.99
Epoch 3, Iteration 35736 in 5.93 seconds. Samples per second 6028.90
Epoch 4, Iteration 35736 in 5.13 seconds. Samples per second 6960.33
Epoch 5, Iteration 35736 in 4.66 seconds. Samples per second 7668.21
Epoch 6, Iteration 35736 in 4.19 seconds. Samples per second 8519.31
Epoch 7, Iteration 35736 in 3.95 seconds. Samples per second 9044.94
Epoch 8, Iteration 35736 in 3.83 seconds. Samples per second 9337.16
Epoch 9, Iteration 35736 in 3.80 seconds. Samples per second 9404.12
Epoch 10, Iteration 35736 in 3.60 seconds. Samples per second 9940.03
Epoch 11, Iteration 35736 in 3.62 seconds. Samples per second 9880.76
Epoch 12, Iteration 35736 in 3.53 seconds. Samples per second 10132.96
Epoch 13, Iteration 35736 in 6.78 seconds. Samples per second 5272.63
Epoch 14, Iteration 35736 in 3.59 seconds. Samples per second 9959.00
Epoch 15, Iteration 35736 in

[I 2025-01-06 07:03:44,249] Trial 33 finished with value: 0.036198686946664484 and parameters: {'topK': 83, 'epochs': 40, 'lambda_i': 0.00586243074857972, 'lambda_j': 0.0004155651792405946, 'learning_rate': 0.07033949402587175}. Best is trial 25 with value: 0.04090163381949502.


Epoch 1, Iteration 35736 in 9.97 seconds. Samples per second 3583.60
Epoch 2, Iteration 35736 in 7.35 seconds. Samples per second 4859.25
Epoch 3, Iteration 35736 in 5.92 seconds. Samples per second 6039.98
Epoch 4, Iteration 35736 in 5.12 seconds. Samples per second 6986.20
Epoch 5, Iteration 35736 in 4.69 seconds. Samples per second 7612.90
Epoch 6, Iteration 35736 in 4.31 seconds. Samples per second 8292.41
Epoch 7, Iteration 35736 in 3.93 seconds. Samples per second 9087.58
Epoch 8, Iteration 35736 in 3.79 seconds. Samples per second 9420.32
Epoch 9, Iteration 35736 in 3.66 seconds. Samples per second 9768.90
Epoch 10, Iteration 35736 in 3.47 seconds. Samples per second 10299.62
Epoch 11, Iteration 35736 in 3.47 seconds. Samples per second 10295.58
Epoch 12, Iteration 35736 in 3.39 seconds. Samples per second 10538.69
Epoch 13, Iteration 35736 in 3.29 seconds. Samples per second 10874.11
Epoch 14, Iteration 35736 in 3.31 seconds. Samples per second 10799.77
Epoch 15, Iteration 3573

[I 2025-01-06 07:37:03,765] Trial 34 finished with value: 0.040810184455877536 and parameters: {'topK': 15, 'epochs': 40, 'lambda_i': 0.028589336237712655, 'lambda_j': 6.0107655642311166e-05, 'learning_rate': 0.0770883834960638}. Best is trial 25 with value: 0.04090163381949502.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = SLIM_BPR_Python(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Epoch 1, Iteration 35736 in 10.72 seconds. Samples per second 3333.20
Epoch 2, Iteration 35736 in 7.77 seconds. Samples per second 4599.89
Epoch 3, Iteration 35736 in 6.09 seconds. Samples per second 5871.09
Epoch 4, Iteration 35736 in 5.24 seconds. Samples per second 6816.85
Epoch 5, Iteration 35736 in 4.68 seconds. Samples per second 7641.55
Epoch 6, Iteration 35736 in 4.17 seconds. Samples per second 8566.03
Epoch 7, Iteration 35736 in 3.91 seconds. Samples per second 9136.54
Epoch 8, Iteration 35736 in 3.67 seconds. Samples per second 9728.42
Epoch 9, Iteration 35736 in 3.70 seconds. Samples per second 9669.67
Epoch 10, Iteration 35736 in 3.42 seconds. Samples per second 10452.19
Epoch 11, Iteration 35736 in 3.33 seconds. Samples per second 10745.78
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10714.23
Epoch 13, Iteration 35736 in 3.27 seconds. Samples per second 10928.15
Epoch 14, Iteration 35736 in 3.25 seconds. Samples per second 10979.48
Epoch 15, Iteration 357

# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/working/RECsys_Challenge2024/Dataset/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_SLIM_BPR_MAP.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/best_params_SLIM_BPR_MAP.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/history_SLIM_BPR_MAP.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/Submission/submission_SLIM_BPR_MAP.csv' updated successfully.
