# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 4251, done.[K
remote: Counting objects: 100% (1724/1724), done.[K
remote: Compressing objects: 100% (637/637), done.[K
remote: Total 4251 (delta 918), reused 1560 (delta 841), pack-reused 2527 (from 1)[K
Receiving objects: 100% (4251/4251), 171.96 MiB | 29.40 MiB/s, done.
Resolving deltas: 100% (2437/2437), done.
Updating files: 100% (395/395), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__py

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/SLIM'

np.random.seed(42)

## Import the repository

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SLIM_BPR',
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIM_BPR_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.SLIM.SLIM_BPR_Python import SLIM_BPR_Python

def objective_function_slim_bpr(optuna_trial):

    recommender_instance = SLIM_BPR_Python(URM_train)

    full_hyperp = {
                    "topK": optuna_trial.suggest_int("topK", 0, 500),
                    "epochs": optuna_trial.suggest_int("epochs", 1, 50),
                    "lambda_i": optuna_trial.suggest_float("lambda_i", 1e-5, 1e-1, log=True),
                    "lambda_j": optuna_trial.suggest_float("lambda_j", 1e-5, 1e-1, log=True),
                    "learning_rate": optuna_trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
                }

    recommender_instance.fit(**full_hyperp)

    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)

    return result_df.loc[10]["MAP"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_slim_bpr, n_trials=10)

[I 2024-12-16 12:41:59,599] Using an existing study with name 'hyperparameters_tuning_SLIM_BPR_MAP' instead of creating a new one.


Epoch 1, Iteration 35736 in 11.31 seconds. Samples per second 3158.94
Epoch 2, Iteration 35736 in 7.89 seconds. Samples per second 4527.63
Epoch 3, Iteration 35736 in 6.20 seconds. Samples per second 5766.67
Epoch 4, Iteration 35736 in 5.23 seconds. Samples per second 6835.93
Epoch 5, Iteration 35736 in 4.61 seconds. Samples per second 7749.21
Epoch 6, Iteration 35736 in 4.22 seconds. Samples per second 8468.61
Epoch 7, Iteration 35736 in 3.82 seconds. Samples per second 9348.17
Epoch 8, Iteration 35736 in 3.69 seconds. Samples per second 9681.78
Epoch 9, Iteration 35736 in 3.49 seconds. Samples per second 10252.78
Epoch 10, Iteration 35736 in 3.34 seconds. Samples per second 10712.95
Epoch 11, Iteration 35736 in 3.36 seconds. Samples per second 10642.16
Epoch 12, Iteration 35736 in 3.22 seconds. Samples per second 11096.58
Epoch 13, Iteration 35736 in 3.15 seconds. Samples per second 11359.71
Epoch 14, Iteration 35736 in 3.25 seconds. Samples per second 10987.29
Epoch 15, Iteration 35

[I 2024-12-16 12:46:53,631] Trial 100 finished with value: 0.03209099837902933 and parameters: {'topK': 258, 'epochs': 34, 'lambda_i': 0.09618816669745978, 'lambda_j': 1.4094463989441331e-05, 'learning_rate': 0.0465101829513746}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 17.24 seconds. Samples per second 2073.25
Epoch 2, Iteration 35736 in 16.47 seconds. Samples per second 2169.34
Epoch 3, Iteration 35736 in 8.38 seconds. Samples per second 4262.06
Epoch 4, Iteration 35736 in 6.78 seconds. Samples per second 5269.67
Epoch 5, Iteration 35736 in 5.78 seconds. Samples per second 6182.23
Epoch 6, Iteration 35736 in 5.08 seconds. Samples per second 7033.25
Epoch 7, Iteration 35736 in 4.52 seconds. Samples per second 7904.17
Epoch 8, Iteration 35736 in 4.21 seconds. Samples per second 8489.50
Epoch 9, Iteration 35736 in 3.86 seconds. Samples per second 9250.87
Epoch 10, Iteration 35736 in 3.73 seconds. Samples per second 9569.02
Epoch 11, Iteration 35736 in 3.52 seconds. Samples per second 10162.45
Epoch 12, Iteration 35736 in 3.47 seconds. Samples per second 10298.10
Epoch 13, Iteration 35736 in 3.34 seconds. Samples per second 10683.49
Epoch 14, Iteration 35736 in 3.38 seconds. Samples per second 10563.42
Epoch 15, Iteration 357

[I 2024-12-16 12:53:38,411] Trial 101 finished with value: 0.040003929792171274 and parameters: {'topK': 10, 'epochs': 29, 'lambda_i': 0.05480373728893908, 'lambda_j': 0.0001997566172648345, 'learning_rate': 0.08588879753841518}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 12.05 seconds. Samples per second 2966.14
Epoch 2, Iteration 35736 in 8.65 seconds. Samples per second 4130.93
Epoch 3, Iteration 35736 in 6.89 seconds. Samples per second 5186.85
Epoch 4, Iteration 35736 in 5.77 seconds. Samples per second 6189.26
Epoch 5, Iteration 35736 in 5.00 seconds. Samples per second 7152.83
Epoch 6, Iteration 35736 in 4.49 seconds. Samples per second 7952.07
Epoch 7, Iteration 35736 in 4.08 seconds. Samples per second 8753.26
Epoch 8, Iteration 35736 in 3.90 seconds. Samples per second 9163.55
Epoch 9, Iteration 35736 in 3.69 seconds. Samples per second 9678.53
Epoch 10, Iteration 35736 in 3.61 seconds. Samples per second 9910.46
Epoch 11, Iteration 35736 in 3.46 seconds. Samples per second 10317.65
Epoch 12, Iteration 35736 in 3.52 seconds. Samples per second 10162.04
Epoch 13, Iteration 35736 in 3.42 seconds. Samples per second 10442.84
Epoch 14, Iteration 35736 in 3.33 seconds. Samples per second 10741.28
Epoch 15, Iteration 3573

[I 2024-12-16 13:00:25,230] Trial 102 finished with value: 0.039801831450376066 and parameters: {'topK': 13, 'epochs': 29, 'lambda_i': 0.0694379641882813, 'lambda_j': 1.825467392282596e-05, 'learning_rate': 0.08362110418404013}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 11.52 seconds. Samples per second 3101.49
Epoch 2, Iteration 35736 in 7.53 seconds. Samples per second 4742.71
Epoch 3, Iteration 35736 in 6.03 seconds. Samples per second 5927.97
Epoch 4, Iteration 35736 in 5.19 seconds. Samples per second 6884.69
Epoch 5, Iteration 35736 in 4.55 seconds. Samples per second 7862.32
Epoch 6, Iteration 35736 in 4.42 seconds. Samples per second 8092.55
Epoch 7, Iteration 35736 in 3.97 seconds. Samples per second 8995.23
Epoch 8, Iteration 35736 in 3.80 seconds. Samples per second 9400.48
Epoch 9, Iteration 35736 in 3.75 seconds. Samples per second 9517.13
Epoch 10, Iteration 35736 in 3.66 seconds. Samples per second 9757.82
Epoch 11, Iteration 35736 in 3.50 seconds. Samples per second 10215.82
Epoch 12, Iteration 35736 in 3.44 seconds. Samples per second 10384.70
Epoch 13, Iteration 35736 in 3.29 seconds. Samples per second 10862.98
Epoch 14, Iteration 35736 in 3.30 seconds. Samples per second 10833.81
Epoch 15, Iteration 3573

[I 2024-12-16 13:07:23,191] Trial 103 finished with value: 0.0397519927669514 and parameters: {'topK': 9, 'epochs': 32, 'lambda_i': 0.05461075454975078, 'lambda_j': 1.4566116196914038e-05, 'learning_rate': 0.0661247201411608}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.49 seconds. Samples per second 3407.08
Epoch 2, Iteration 35736 in 7.58 seconds. Samples per second 4715.11
Epoch 3, Iteration 35736 in 6.24 seconds. Samples per second 5730.25
Epoch 4, Iteration 35736 in 5.37 seconds. Samples per second 6654.58
Epoch 5, Iteration 35736 in 4.72 seconds. Samples per second 7573.31
Epoch 6, Iteration 35736 in 4.21 seconds. Samples per second 8484.59
Epoch 7, Iteration 35736 in 3.90 seconds. Samples per second 9168.28
Epoch 8, Iteration 35736 in 3.71 seconds. Samples per second 9620.55
Epoch 9, Iteration 35736 in 3.51 seconds. Samples per second 10183.03
Epoch 10, Iteration 35736 in 3.49 seconds. Samples per second 10248.88
Epoch 11, Iteration 35736 in 3.41 seconds. Samples per second 10494.27
Epoch 12, Iteration 35736 in 3.43 seconds. Samples per second 10413.74
Epoch 13, Iteration 35736 in 3.35 seconds. Samples per second 10663.82
Epoch 14, Iteration 35736 in 3.26 seconds. Samples per second 10950.64
Epoch 15, Iteration 35

[I 2024-12-16 13:14:06,424] Trial 104 finished with value: 0.0397985259656123 and parameters: {'topK': 8, 'epochs': 32, 'lambda_i': 0.054769710219132954, 'lambda_j': 1.567920051047592e-05, 'learning_rate': 0.0650567896754302}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.45 seconds. Samples per second 3419.99
Epoch 2, Iteration 35736 in 7.45 seconds. Samples per second 4794.06
Epoch 3, Iteration 35736 in 5.97 seconds. Samples per second 5982.57
Epoch 4, Iteration 35736 in 5.06 seconds. Samples per second 7068.19
Epoch 5, Iteration 35736 in 4.50 seconds. Samples per second 7947.65
Epoch 6, Iteration 35736 in 4.31 seconds. Samples per second 8285.32
Epoch 7, Iteration 35736 in 3.89 seconds. Samples per second 9179.68
Epoch 8, Iteration 35736 in 3.81 seconds. Samples per second 9374.06
Epoch 9, Iteration 35736 in 5.04 seconds. Samples per second 7094.86
Epoch 10, Iteration 35736 in 3.62 seconds. Samples per second 9862.60
Epoch 11, Iteration 35736 in 3.52 seconds. Samples per second 10165.39
Epoch 12, Iteration 35736 in 9.11 seconds. Samples per second 3921.61
Epoch 13, Iteration 35736 in 6.02 seconds. Samples per second 5938.99
Epoch 14, Iteration 35736 in 3.67 seconds. Samples per second 9742.49
Epoch 15, Iteration 35736 i

[I 2024-12-16 13:21:13,054] Trial 105 finished with value: 0.039890688232026926 and parameters: {'topK': 7, 'epochs': 35, 'lambda_i': 0.054813343680909524, 'lambda_j': 1.7503080113904803e-05, 'learning_rate': 0.061432099811390965}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.34 seconds. Samples per second 3456.25
Epoch 2, Iteration 35736 in 7.69 seconds. Samples per second 4645.26
Epoch 3, Iteration 35736 in 6.05 seconds. Samples per second 5908.11
Epoch 4, Iteration 35736 in 5.17 seconds. Samples per second 6918.70
Epoch 5, Iteration 35736 in 4.59 seconds. Samples per second 7789.48
Epoch 6, Iteration 35736 in 4.18 seconds. Samples per second 8556.86
Epoch 7, Iteration 35736 in 3.83 seconds. Samples per second 9329.37
Epoch 8, Iteration 35736 in 3.75 seconds. Samples per second 9517.81
Epoch 9, Iteration 35736 in 3.53 seconds. Samples per second 10135.71
Epoch 10, Iteration 35736 in 3.56 seconds. Samples per second 10044.10
Epoch 11, Iteration 35736 in 3.42 seconds. Samples per second 10458.09
Epoch 12, Iteration 35736 in 3.30 seconds. Samples per second 10820.97
Epoch 13, Iteration 35736 in 3.32 seconds. Samples per second 10776.18
Epoch 14, Iteration 35736 in 3.24 seconds. Samples per second 11027.74
Epoch 15, Iteration 35

[I 2024-12-16 13:28:24,398] Trial 106 finished with value: 0.03961602864679026 and parameters: {'topK': 6, 'epochs': 35, 'lambda_i': 0.09860812359043748, 'lambda_j': 1.710027399643863e-05, 'learning_rate': 0.05159466785248036}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.37 seconds. Samples per second 3444.89
Epoch 2, Iteration 35736 in 7.44 seconds. Samples per second 4804.36
Epoch 3, Iteration 35736 in 6.10 seconds. Samples per second 5854.40
Epoch 4, Iteration 35736 in 5.06 seconds. Samples per second 7057.81
Epoch 5, Iteration 35736 in 4.73 seconds. Samples per second 7554.88
Epoch 6, Iteration 35736 in 4.13 seconds. Samples per second 8650.16
Epoch 7, Iteration 35736 in 3.79 seconds. Samples per second 9429.54
Epoch 8, Iteration 35736 in 3.69 seconds. Samples per second 9688.85
Epoch 9, Iteration 35736 in 3.53 seconds. Samples per second 10125.45
Epoch 10, Iteration 35736 in 3.58 seconds. Samples per second 9978.97
Epoch 11, Iteration 35736 in 3.61 seconds. Samples per second 9899.56
Epoch 12, Iteration 35736 in 3.42 seconds. Samples per second 10456.67
Epoch 13, Iteration 35736 in 3.37 seconds. Samples per second 10611.15
Epoch 14, Iteration 35736 in 3.29 seconds. Samples per second 10856.44
Epoch 15, Iteration 3573

[I 2024-12-16 13:35:35,944] Trial 107 finished with value: 0.03966284501345459 and parameters: {'topK': 6, 'epochs': 35, 'lambda_i': 0.05969159732578023, 'lambda_j': 1.4761611530692492e-05, 'learning_rate': 0.058195060469660075}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.41 seconds. Samples per second 3433.85
Epoch 2, Iteration 35736 in 7.61 seconds. Samples per second 4694.32
Epoch 3, Iteration 35736 in 6.21 seconds. Samples per second 5750.40
Epoch 4, Iteration 35736 in 5.17 seconds. Samples per second 6914.54
Epoch 5, Iteration 35736 in 4.57 seconds. Samples per second 7822.75
Epoch 6, Iteration 35736 in 4.11 seconds. Samples per second 8693.12
Epoch 7, Iteration 35736 in 3.88 seconds. Samples per second 9203.59
Epoch 8, Iteration 35736 in 3.75 seconds. Samples per second 9534.15
Epoch 9, Iteration 35736 in 3.55 seconds. Samples per second 10068.85
Epoch 10, Iteration 35736 in 3.45 seconds. Samples per second 10364.20
Epoch 11, Iteration 35736 in 3.30 seconds. Samples per second 10821.57
Epoch 12, Iteration 35736 in 3.23 seconds. Samples per second 11080.56
Epoch 13, Iteration 35736 in 3.26 seconds. Samples per second 10969.52
Epoch 14, Iteration 35736 in 3.19 seconds. Samples per second 11185.36
Epoch 15, Iteration 35

[I 2024-12-16 13:42:35,460] Trial 108 finished with value: 0.00028209999175022335 and parameters: {'topK': 0, 'epochs': 36, 'lambda_i': 0.07256460747172962, 'lambda_j': 1.4777453819430153e-05, 'learning_rate': 0.0032414251232474696}. Best is trial 98 with value: 0.040251465450157724.


Epoch 1, Iteration 35736 in 10.44 seconds. Samples per second 3423.91
Epoch 2, Iteration 35736 in 7.58 seconds. Samples per second 4711.42
Epoch 3, Iteration 35736 in 6.07 seconds. Samples per second 5886.24
Epoch 4, Iteration 35736 in 5.15 seconds. Samples per second 6935.33
Epoch 5, Iteration 35736 in 4.67 seconds. Samples per second 7645.79
Epoch 6, Iteration 35736 in 4.24 seconds. Samples per second 8435.23
Epoch 7, Iteration 35736 in 4.16 seconds. Samples per second 8597.74
Epoch 8, Iteration 35736 in 5.47 seconds. Samples per second 6527.18
Epoch 9, Iteration 35736 in 4.17 seconds. Samples per second 8576.40
Epoch 10, Iteration 35736 in 3.87 seconds. Samples per second 9229.74
Epoch 11, Iteration 35736 in 3.70 seconds. Samples per second 9653.25
Epoch 12, Iteration 35736 in 3.55 seconds. Samples per second 10076.17
Epoch 13, Iteration 35736 in 3.66 seconds. Samples per second 9761.09
Epoch 14, Iteration 35736 in 3.43 seconds. Samples per second 10425.58
Epoch 15, Iteration 35736 

[I 2024-12-16 13:50:15,415] Trial 109 finished with value: 0.04014887390550975 and parameters: {'topK': 6, 'epochs': 39, 'lambda_i': 0.05984158427466074, 'lambda_j': 1.0299885316637721e-05, 'learning_rate': 0.05830013768602782}. Best is trial 98 with value: 0.040251465450157724.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    
    recommender_instance = SLIM_BPR_Python(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Epoch 1, Iteration 35736 in 11.95 seconds. Samples per second 2991.34
Epoch 2, Iteration 35736 in 8.26 seconds. Samples per second 4328.19
Epoch 3, Iteration 35736 in 6.67 seconds. Samples per second 5356.07
Epoch 4, Iteration 35736 in 5.56 seconds. Samples per second 6424.78
Epoch 5, Iteration 35736 in 4.89 seconds. Samples per second 7303.30
Epoch 6, Iteration 35736 in 4.35 seconds. Samples per second 8208.30
Epoch 7, Iteration 35736 in 4.17 seconds. Samples per second 8568.95
Epoch 8, Iteration 35736 in 3.94 seconds. Samples per second 9077.43
Epoch 9, Iteration 35736 in 3.74 seconds. Samples per second 9563.65
Epoch 10, Iteration 35736 in 3.83 seconds. Samples per second 9333.79
Epoch 11, Iteration 35736 in 3.71 seconds. Samples per second 9639.38
Epoch 12, Iteration 35736 in 3.85 seconds. Samples per second 9280.36
Epoch 13, Iteration 35736 in 3.49 seconds. Samples per second 10252.93
Epoch 14, Iteration 35736 in 3.41 seconds. Samples per second 10493.15
Epoch 15, Iteration 35736 

# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_SLIM_BPR.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithoutKFCV/SLIM/SLIM_BPR_Recommender/OptimizingMAP/best_params_SLIM_BPR_MAP.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/SLIM/SLIM_BPR_Recommender/OptimizingMAP/history_SLIM_BPR_MAP.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}_Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/SLIM/SLIM_BPR_Recommender/OptimizingMAP/Submission/submission_SLIM_BPR_MAP.csv' created successfully.
