# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6196, done.[K
remote: Counting objects: 100% (107/107), done.[K
remote: Compressing objects: 100% (88/88), done.[K
remote: Total 6196 (delta 35), reused 16 (delta 6), pack-reused 6089 (from 4)[K
Receiving objects: 100% (6196/6196), 399.83 MiB | 37.79 MiB/s, done.
Resolving deltas: 100% (3415/3415), done.
Updating files: 100% (486/486), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K’:
30353 |       [01;35m[K__pyx_t_4

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/SLIM'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SLIM_BPR',
    'n_folds': 5,
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIM_BPR_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.SLIM.SLIM_BPR_Python import SLIM_BPR_Python

def objective_function_SLIM_BPR(optuna_trial):

    full_hyperp = {
                    "topK": optuna_trial.suggest_int("topK", 0, 500),
                    "epochs": optuna_trial.suggest_int("epochs", 1, 50),
                    "lambda_i": optuna_trial.suggest_float("lambda_i", 1e-5, 1e-1, log=True),
                    "lambda_j": optuna_trial.suggest_float("lambda_j", 1e-5, 1e-1, log=True),
                    "learning_rate": optuna_trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
                }
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = SLIM_BPR_Python(URM_train_fold)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[10])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[10]["MAP"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_SLIM_BPR, n_trials=15)

[I 2025-01-05 12:18:04,227] A new study created in RDB with name: hyperparameters_tuning_SLIM_BPR_MAP


Epoch 1, Iteration 35736 in 10.55 seconds. Samples per second 3386.50
Epoch 2, Iteration 35736 in 7.62 seconds. Samples per second 4688.94
Epoch 3, Iteration 35736 in 6.25 seconds. Samples per second 5721.77
Epoch 4, Iteration 35736 in 5.17 seconds. Samples per second 6912.88
Epoch 5, Iteration 35736 in 4.56 seconds. Samples per second 7837.53
Epoch 6, Iteration 35736 in 4.15 seconds. Samples per second 8614.82
Epoch 7, Iteration 35736 in 3.86 seconds. Samples per second 9246.18
Epoch 8, Iteration 35736 in 3.63 seconds. Samples per second 9841.89
Epoch 9, Iteration 35736 in 3.47 seconds. Samples per second 10300.99
Epoch 10, Iteration 35736 in 3.46 seconds. Samples per second 10320.17
Epoch 11, Iteration 35736 in 3.29 seconds. Samples per second 10845.85
Epoch 12, Iteration 35736 in 3.23 seconds. Samples per second 11052.65
Epoch 13, Iteration 35736 in 3.20 seconds. Samples per second 11152.37
Epoch 14, Iteration 35736 in 3.17 seconds. Samples per second 11273.02
Epoch 15, Iteration 35

[I 2025-01-05 12:45:37,335] Trial 0 finished with value: 0.030840353646130032 and parameters: {'topK': 362, 'epochs': 23, 'lambda_i': 0.052867545767967615, 'lambda_j': 0.010496161067592736, 'learning_rate': 0.06949688514175209}. Best is trial 0 with value: 0.030840353646130032.


Epoch 1, Iteration 35736 in 10.11 seconds. Samples per second 3536.23
Epoch 2, Iteration 35736 in 7.44 seconds. Samples per second 4800.03
Epoch 3, Iteration 35736 in 5.80 seconds. Samples per second 6157.84
Epoch 4, Iteration 35736 in 5.00 seconds. Samples per second 7140.23
Epoch 5, Iteration 35736 in 4.39 seconds. Samples per second 8145.88
Epoch 6, Iteration 35736 in 4.03 seconds. Samples per second 8865.29
Epoch 7, Iteration 35736 in 3.82 seconds. Samples per second 9366.97
Epoch 8, Iteration 35736 in 3.60 seconds. Samples per second 9937.49
Epoch 9, Iteration 35736 in 3.55 seconds. Samples per second 10063.64
Epoch 10, Iteration 35736 in 3.48 seconds. Samples per second 10261.03
Epoch 11, Iteration 35736 in 3.32 seconds. Samples per second 10767.31
Epoch 12, Iteration 35736 in 3.30 seconds. Samples per second 10815.50
Epoch 13, Iteration 35736 in 3.24 seconds. Samples per second 11039.22
Epoch 14, Iteration 35736 in 3.20 seconds. Samples per second 11168.65
Epoch 15, Iteration 35

[I 2025-01-05 13:17:09,808] Trial 1 finished with value: 0.03301220945830492 and parameters: {'topK': 48, 'epochs': 48, 'lambda_i': 4.501902294751744e-05, 'lambda_j': 0.033796914839612925, 'learning_rate': 0.0031929654934849884}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.90 seconds. Samples per second 3608.39
Epoch 2, Iteration 35736 in 7.19 seconds. Samples per second 4971.47
Epoch 3, Iteration 35736 in 5.84 seconds. Samples per second 6118.43
Epoch 4, Iteration 35736 in 5.01 seconds. Samples per second 7133.22
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8053.65
Epoch 6, Iteration 35736 in 4.08 seconds. Samples per second 8768.49
Epoch 7, Iteration 35736 in 3.77 seconds. Samples per second 9471.93
Epoch 8, Iteration 35736 in 3.57 seconds. Samples per second 10003.49
Epoch 9, Iteration 35736 in 3.47 seconds. Samples per second 10310.96
Epoch 10, Iteration 35736 in 3.34 seconds. Samples per second 10695.29
Epoch 11, Iteration 35736 in 3.28 seconds. Samples per second 10894.16
Epoch 12, Iteration 35736 in 3.25 seconds. Samples per second 11004.66
Epoch 13, Iteration 35736 in 3.23 seconds. Samples per second 11048.18
Epoch 14, Iteration 35736 in 3.14 seconds. Samples per second 11371.40
Epoch 15, Iteration 35

[I 2025-01-05 13:44:42,133] Trial 2 finished with value: 0.029782918121100777 and parameters: {'topK': 420, 'epochs': 16, 'lambda_i': 0.00885062180756791, 'lambda_j': 1.6963509145454547e-05, 'learning_rate': 0.08373244635083368}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.00 seconds. Samples per second 3572.10
Epoch 2, Iteration 35736 in 7.22 seconds. Samples per second 4947.75
Epoch 3, Iteration 35736 in 5.87 seconds. Samples per second 6086.39
Epoch 4, Iteration 35736 in 5.07 seconds. Samples per second 7050.12
Epoch 5, Iteration 35736 in 4.49 seconds. Samples per second 7953.52
Epoch 6, Iteration 35736 in 4.06 seconds. Samples per second 8796.76
Epoch 7, Iteration 35736 in 3.81 seconds. Samples per second 9382.14
Epoch 8, Iteration 35736 in 3.66 seconds. Samples per second 9753.04
Epoch 9, Iteration 35736 in 3.51 seconds. Samples per second 10167.20
Epoch 10, Iteration 35736 in 3.41 seconds. Samples per second 10494.48
Epoch 11, Iteration 35736 in 3.37 seconds. Samples per second 10619.41
Epoch 12, Iteration 35736 in 3.26 seconds. Samples per second 10956.31
Epoch 13, Iteration 35736 in 3.28 seconds. Samples per second 10882.80
Epoch 14, Iteration 35736 in 3.22 seconds. Samples per second 11087.07
Epoch 15, Iteration 35

[I 2025-01-05 14:15:05,126] Trial 3 finished with value: 0.029155475622983727 and parameters: {'topK': 287, 'epochs': 43, 'lambda_i': 0.0106652891319488, 'lambda_j': 0.0024628219200826143, 'learning_rate': 0.0002469978251027989}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.94 seconds. Samples per second 3595.92
Epoch 2, Iteration 35736 in 7.24 seconds. Samples per second 4936.43
Epoch 3, Iteration 35736 in 5.79 seconds. Samples per second 6170.74
Epoch 4, Iteration 35736 in 4.93 seconds. Samples per second 7242.69
Epoch 5, Iteration 35736 in 4.41 seconds. Samples per second 8111.81
Epoch 6, Iteration 35736 in 4.03 seconds. Samples per second 8859.03
Epoch 7, Iteration 35736 in 4.61 seconds. Samples per second 7755.45
Epoch 8, Iteration 35736 in 3.60 seconds. Samples per second 9916.04
Epoch 9, Iteration 35736 in 3.53 seconds. Samples per second 10130.59
Epoch 10, Iteration 35736 in 3.38 seconds. Samples per second 10563.32
Epoch 11, Iteration 35736 in 3.27 seconds. Samples per second 10916.67
Epoch 12, Iteration 35736 in 3.23 seconds. Samples per second 11072.38
Epoch 13, Iteration 35736 in 3.22 seconds. Samples per second 11115.19
Epoch 14, Iteration 35736 in 3.13 seconds. Samples per second 11400.58
Epoch 15, Iteration 357

[I 2025-01-05 14:45:47,547] Trial 4 finished with value: 0.028134669576043096 and parameters: {'topK': 457, 'epochs': 31, 'lambda_i': 0.09093391530125938, 'lambda_j': 0.039153135187582365, 'learning_rate': 0.0009064178557831873}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.94 seconds. Samples per second 3594.51
Epoch 2, Iteration 35736 in 7.25 seconds. Samples per second 4930.17
Epoch 3, Iteration 35736 in 5.79 seconds. Samples per second 6173.69
Epoch 4, Iteration 35736 in 4.98 seconds. Samples per second 7169.20
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8047.60
Epoch 6, Iteration 35736 in 4.08 seconds. Samples per second 8754.06
Epoch 7, Iteration 35736 in 3.90 seconds. Samples per second 9151.49
Epoch 8, Iteration 35736 in 3.75 seconds. Samples per second 9535.54
Epoch 9, Iteration 35736 in 3.58 seconds. Samples per second 9973.17
Epoch 10, Iteration 35736 in 3.46 seconds. Samples per second 10335.19
Epoch 11, Iteration 35736 in 3.38 seconds. Samples per second 10562.11
Epoch 12, Iteration 35736 in 3.34 seconds. Samples per second 10711.12
Epoch 13, Iteration 35736 in 3.26 seconds. Samples per second 10973.41
Epoch 14, Iteration 35736 in 3.17 seconds. Samples per second 11264.37
Epoch 15, Iteration 3573

[I 2025-01-05 15:10:37,211] Trial 5 finished with value: 0.02929553189981122 and parameters: {'topK': 241, 'epochs': 27, 'lambda_i': 0.00013491906216138777, 'lambda_j': 0.000288638736954487, 'learning_rate': 0.0021729232129811395}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.98 seconds. Samples per second 3579.91
Epoch 2, Iteration 35736 in 7.23 seconds. Samples per second 4944.03
Epoch 3, Iteration 35736 in 5.81 seconds. Samples per second 6148.44
Epoch 4, Iteration 35736 in 4.96 seconds. Samples per second 7210.85
Epoch 5, Iteration 35736 in 4.37 seconds. Samples per second 8168.25
Epoch 6, Iteration 35736 in 4.01 seconds. Samples per second 8902.02
Epoch 7, Iteration 35736 in 3.78 seconds. Samples per second 9465.56
Epoch 8, Iteration 35736 in 3.56 seconds. Samples per second 10024.46
Epoch 9, Iteration 35736 in 3.54 seconds. Samples per second 10104.29
Epoch 10, Iteration 35736 in 3.36 seconds. Samples per second 10622.24
Epoch 11, Iteration 35736 in 3.27 seconds. Samples per second 10927.27
Epoch 12, Iteration 35736 in 3.22 seconds. Samples per second 11098.50
Epoch 13, Iteration 35736 in 3.19 seconds. Samples per second 11198.70
Epoch 14, Iteration 35736 in 3.15 seconds. Samples per second 11327.07
Epoch 15, Iteration 35

[I 2025-01-05 15:44:46,616] Trial 6 finished with value: 0.02895398448294586 and parameters: {'topK': 322, 'epochs': 40, 'lambda_i': 0.09053116579899019, 'lambda_j': 0.002187420044958117, 'learning_rate': 0.0009052032555730572}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.38 seconds. Samples per second 3443.03
Epoch 2, Iteration 35736 in 7.42 seconds. Samples per second 4815.84
Epoch 3, Iteration 35736 in 5.98 seconds. Samples per second 5980.86
Epoch 4, Iteration 35736 in 5.13 seconds. Samples per second 6969.89
Epoch 5, Iteration 35736 in 4.45 seconds. Samples per second 8026.55
Epoch 6, Iteration 35736 in 4.10 seconds. Samples per second 8714.31
Epoch 7, Iteration 35736 in 3.88 seconds. Samples per second 9219.70
Epoch 8, Iteration 35736 in 3.63 seconds. Samples per second 9832.58
Epoch 9, Iteration 35736 in 3.53 seconds. Samples per second 10121.03
Epoch 10, Iteration 35736 in 3.38 seconds. Samples per second 10587.72
Epoch 11, Iteration 35736 in 3.35 seconds. Samples per second 10673.41
Epoch 12, Iteration 35736 in 3.29 seconds. Samples per second 10866.28
Epoch 13, Iteration 35736 in 3.31 seconds. Samples per second 10785.09
Epoch 14, Iteration 35736 in 3.16 seconds. Samples per second 11302.86
Epoch 15, Iteration 35

[I 2025-01-05 16:13:14,051] Trial 7 finished with value: 0.029193649228173784 and parameters: {'topK': 205, 'epochs': 26, 'lambda_i': 0.0337469842228759, 'lambda_j': 9.729520159393718e-05, 'learning_rate': 0.00012384393337086706}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.00 seconds. Samples per second 3574.81
Epoch 2, Iteration 35736 in 7.21 seconds. Samples per second 4955.68
Epoch 3, Iteration 35736 in 5.83 seconds. Samples per second 6132.97
Epoch 4, Iteration 35736 in 4.95 seconds. Samples per second 7222.67
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8057.28
Epoch 6, Iteration 35736 in 4.10 seconds. Samples per second 8707.56
Epoch 7, Iteration 35736 in 3.75 seconds. Samples per second 9520.17
Epoch 8, Iteration 35736 in 3.61 seconds. Samples per second 9889.88
Epoch 9, Iteration 35736 in 3.44 seconds. Samples per second 10375.55
Epoch 10, Iteration 35736 in 3.36 seconds. Samples per second 10636.81
Train completed in 0.84 minutes
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 45.31 sec. Users per second: 785
Epoch 1, Iteration 35736 in 9.95 seconds. Samples per second 3590.62
Epoch 2, Iteration 35736 in 7.34 seconds.

[I 2025-01-05 16:36:31,267] Trial 8 finished with value: 0.028453356184721834 and parameters: {'topK': 218, 'epochs': 10, 'lambda_i': 0.051952043462618135, 'lambda_j': 5.5878828118608385e-05, 'learning_rate': 0.02686617402272799}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.59 seconds. Samples per second 3374.90
Epoch 2, Iteration 35736 in 7.28 seconds. Samples per second 4911.47
Epoch 3, Iteration 35736 in 5.82 seconds. Samples per second 6141.61
Epoch 4, Iteration 35736 in 4.94 seconds. Samples per second 7237.18
Epoch 5, Iteration 35736 in 4.48 seconds. Samples per second 7977.37
Epoch 6, Iteration 35736 in 4.10 seconds. Samples per second 8710.14
Epoch 7, Iteration 35736 in 3.77 seconds. Samples per second 9483.86
Train completed in 0.68 minutes
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.00 sec. Users per second: 1078
Epoch 1, Iteration 35736 in 10.37 seconds. Samples per second 3446.15
Epoch 2, Iteration 35736 in 7.58 seconds. Samples per second 4716.92
Epoch 3, Iteration 35736 in 6.19 seconds. Samples per second 5776.05
Epoch 4, Iteration 35736 in 5.28 seconds. Samples per second 6764.35
Epoch 5, Iteration 35736 in 4.87 seconds. 

[I 2025-01-05 16:55:11,000] Trial 9 finished with value: 0.029339095962876503 and parameters: {'topK': 8, 'epochs': 7, 'lambda_i': 0.0004941257838451811, 'lambda_j': 0.0006812121060382428, 'learning_rate': 0.03526191578157976}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.99 seconds. Samples per second 3577.20
Epoch 2, Iteration 35736 in 7.22 seconds. Samples per second 4950.99
Epoch 3, Iteration 35736 in 5.82 seconds. Samples per second 6138.45
Epoch 4, Iteration 35736 in 5.09 seconds. Samples per second 7021.65
Epoch 5, Iteration 35736 in 4.41 seconds. Samples per second 8104.63
Epoch 6, Iteration 35736 in 4.09 seconds. Samples per second 8734.95
Epoch 7, Iteration 35736 in 3.80 seconds. Samples per second 9414.08
Epoch 8, Iteration 35736 in 3.63 seconds. Samples per second 9834.66
Epoch 9, Iteration 35736 in 3.46 seconds. Samples per second 10341.31
Epoch 10, Iteration 35736 in 3.36 seconds. Samples per second 10629.33
Epoch 11, Iteration 35736 in 3.33 seconds. Samples per second 10717.88
Epoch 12, Iteration 35736 in 3.38 seconds. Samples per second 10586.21
Epoch 13, Iteration 35736 in 3.19 seconds. Samples per second 11214.62
Epoch 14, Iteration 35736 in 3.19 seconds. Samples per second 11207.59
Epoch 15, Iteration 357

[I 2025-01-05 17:25:28,367] Trial 10 finished with value: 0.0002664826418719655 and parameters: {'topK': 0, 'epochs': 50, 'lambda_i': 1.048491783482435e-05, 'lambda_j': 0.08494553884169632, 'learning_rate': 0.007390946940122691}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.56 seconds. Samples per second 3384.85
Epoch 2, Iteration 35736 in 7.62 seconds. Samples per second 4692.15
Epoch 3, Iteration 35736 in 5.96 seconds. Samples per second 5998.87
Epoch 4, Iteration 35736 in 5.11 seconds. Samples per second 6987.31
Epoch 5, Iteration 35736 in 4.52 seconds. Samples per second 7908.81
Epoch 6, Iteration 35736 in 4.16 seconds. Samples per second 8600.17
Epoch 7, Iteration 35736 in 3.84 seconds. Samples per second 9303.95
Epoch 8, Iteration 35736 in 3.64 seconds. Samples per second 9812.92
Epoch 9, Iteration 35736 in 3.65 seconds. Samples per second 9796.06
Epoch 10, Iteration 35736 in 3.43 seconds. Samples per second 10404.39
Epoch 11, Iteration 35736 in 3.39 seconds. Samples per second 10552.52
Epoch 12, Iteration 35736 in 3.37 seconds. Samples per second 10590.74
Epoch 13, Iteration 35736 in 3.28 seconds. Samples per second 10895.78
Epoch 14, Iteration 35736 in 3.27 seconds. Samples per second 10928.35
Epoch 15, Iteration 357

[I 2025-01-05 17:52:47,279] Trial 11 finished with value: 0.030909397410604278 and parameters: {'topK': 101, 'epochs': 19, 'lambda_i': 1.8956062731773304e-05, 'lambda_j': 0.01261063955717763, 'learning_rate': 0.010190323378517361}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 9.89 seconds. Samples per second 3612.13
Epoch 2, Iteration 35736 in 7.19 seconds. Samples per second 4969.45
Epoch 3, Iteration 35736 in 5.83 seconds. Samples per second 6131.60
Epoch 4, Iteration 35736 in 5.13 seconds. Samples per second 6965.70
Epoch 5, Iteration 35736 in 4.45 seconds. Samples per second 8021.86
Epoch 6, Iteration 35736 in 4.05 seconds. Samples per second 8825.61
Epoch 7, Iteration 35736 in 3.78 seconds. Samples per second 9442.69
Epoch 8, Iteration 35736 in 3.61 seconds. Samples per second 9899.82
Epoch 9, Iteration 35736 in 3.50 seconds. Samples per second 10203.84
Epoch 10, Iteration 35736 in 3.35 seconds. Samples per second 10672.24
Epoch 11, Iteration 35736 in 3.27 seconds. Samples per second 10928.78
Epoch 12, Iteration 35736 in 3.23 seconds. Samples per second 11073.88
Epoch 13, Iteration 35736 in 3.29 seconds. Samples per second 10876.35
Epoch 14, Iteration 35736 in 3.15 seconds. Samples per second 11341.28
Epoch 15, Iteration 357

[I 2025-01-05 18:19:26,800] Trial 12 finished with value: 0.030198658109675335 and parameters: {'topK': 89, 'epochs': 16, 'lambda_i': 1.164812826726499e-05, 'lambda_j': 0.01108009813759228, 'learning_rate': 0.0072048463400766795}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.03 seconds. Samples per second 3562.97
Epoch 2, Iteration 35736 in 7.28 seconds. Samples per second 4908.09
Epoch 3, Iteration 35736 in 5.83 seconds. Samples per second 6125.86
Epoch 4, Iteration 35736 in 4.98 seconds. Samples per second 7175.79
Epoch 5, Iteration 35736 in 4.37 seconds. Samples per second 8168.40
Epoch 6, Iteration 35736 in 4.05 seconds. Samples per second 8834.04
Epoch 7, Iteration 35736 in 3.80 seconds. Samples per second 9402.96
Epoch 8, Iteration 35736 in 3.57 seconds. Samples per second 10014.20
Epoch 9, Iteration 35736 in 3.53 seconds. Samples per second 10131.67
Epoch 10, Iteration 35736 in 3.40 seconds. Samples per second 10495.43
Epoch 11, Iteration 35736 in 3.32 seconds. Samples per second 10754.11
Epoch 12, Iteration 35736 in 3.29 seconds. Samples per second 10854.82
Epoch 13, Iteration 35736 in 3.30 seconds. Samples per second 10817.34
Epoch 14, Iteration 35736 in 3.24 seconds. Samples per second 11025.57
Epoch 15, Iteration 3

[I 2025-01-05 18:52:24,501] Trial 13 finished with value: 0.031999829758385716 and parameters: {'topK': 122, 'epochs': 36, 'lambda_i': 6.115559892064658e-05, 'lambda_j': 0.016618521383949855, 'learning_rate': 0.009202181787699529}. Best is trial 1 with value: 0.03301220945830492.


Epoch 1, Iteration 35736 in 10.03 seconds. Samples per second 3562.51
Epoch 2, Iteration 35736 in 7.25 seconds. Samples per second 4927.38
Epoch 3, Iteration 35736 in 5.87 seconds. Samples per second 6085.83
Epoch 4, Iteration 35736 in 4.96 seconds. Samples per second 7202.81
Epoch 5, Iteration 35736 in 4.44 seconds. Samples per second 8056.01
Epoch 6, Iteration 35736 in 4.03 seconds. Samples per second 8869.35
Epoch 7, Iteration 35736 in 3.91 seconds. Samples per second 9145.24
Epoch 8, Iteration 35736 in 9.26 seconds. Samples per second 3861.14
Epoch 9, Iteration 35736 in 3.94 seconds. Samples per second 9080.44
Epoch 10, Iteration 35736 in 3.65 seconds. Samples per second 9800.88
Epoch 11, Iteration 35736 in 3.51 seconds. Samples per second 10190.26
Epoch 12, Iteration 35736 in 3.37 seconds. Samples per second 10588.58
Epoch 13, Iteration 35736 in 3.32 seconds. Samples per second 10776.76
Epoch 14, Iteration 35736 in 3.38 seconds. Samples per second 10573.10
Epoch 15, Iteration 3573

[I 2025-01-05 19:24:11,450] Trial 14 finished with value: 0.030917047411810217 and parameters: {'topK': 127, 'epochs': 35, 'lambda_i': 0.00013558305148759082, 'lambda_j': 0.0330457027192364, 'learning_rate': 0.002958943615098495}. Best is trial 1 with value: 0.03301220945830492.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = SLIM_BPR_Python(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Epoch 1, Iteration 35736 in 10.82 seconds. Samples per second 3302.76
Epoch 2, Iteration 35736 in 7.59 seconds. Samples per second 4706.34
Epoch 3, Iteration 35736 in 6.04 seconds. Samples per second 5915.70
Epoch 4, Iteration 35736 in 5.12 seconds. Samples per second 6976.37
Epoch 5, Iteration 35736 in 4.62 seconds. Samples per second 7738.20
Epoch 6, Iteration 35736 in 4.08 seconds. Samples per second 8749.08
Epoch 7, Iteration 35736 in 3.86 seconds. Samples per second 9264.81
Epoch 8, Iteration 35736 in 3.64 seconds. Samples per second 9812.97
Epoch 9, Iteration 35736 in 3.55 seconds. Samples per second 10077.36
Epoch 10, Iteration 35736 in 3.44 seconds. Samples per second 10387.07
Epoch 11, Iteration 35736 in 3.35 seconds. Samples per second 10675.80
Epoch 12, Iteration 35736 in 3.35 seconds. Samples per second 10669.43
Epoch 13, Iteration 35736 in 8.04 seconds. Samples per second 4445.05
Epoch 14, Iteration 35736 in 5.87 seconds. Samples per second 6083.70
Epoch 15, Iteration 3573

# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/working/RECsys_Challenge2024/Dataset/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_SLIM_BPR_MAP.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/best_params_SLIM_BPR_MAP.json' created successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/history_SLIM_BPR_MAP.db' created successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/SLIM/SLIM_BPRRecommender/OptimizingMAP/Submission/submission_SLIM_BPR_MAP.csv' created successfully.
