# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 3954, done.[K
remote: Counting objects: 100% (1427/1427), done.[K
remote: Compressing objects: 100% (619/619), done.[K
remote: Total 3954 (delta 786), reused 1127 (delta 611), pack-reused 2527 (from 1)[K
Receiving objects: 100% (3954/3954), 166.91 MiB | 6.11 MiB/s, done.
Resolving deltas: 100% (2305/2305), done.
Updating files: 100% (372/372), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__py

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/MatrixFactorization/PureSVDRecommender'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'ScaledPureSVD',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ScaledPureSVD.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.MatrixFactorization.PureSVDRecommender import ScaledPureSVDRecommender

def objective_function_ScaledPureSVD(optuna_trial):
    
    recommender_instance = ScaledPureSVDRecommender(URM_train)
    
    full_hyperp = {
                    'num_factors': optuna_trial.suggest_int('num_factors', 1, 2000, log=True),
                    'scaling_items': optuna_trial.suggest_float('scaling_items', 1e-2, 2, log=True),
                    'scaling_users': optuna_trial.suggest_float('scaling_users', 1e-2, 2, log=True),
                  } 
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ScaledPureSVD, n_trials=50)

[I 2024-12-04 14:17:36,091] Using an existing study with name 'hyperparameters_tuning_ScaledPureSVD' instead of creating a new one.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.59 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.84 min. Users per second: 209


[I 2024-12-04 14:23:02,186] Trial 243 finished with value: 0.03933124747768531 and parameters: {'num_factors': 1654, 'scaling_items': 0.015631665239072064, 'scaling_users': 0.013209896893690197}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.53 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.75 min. Users per second: 216


[I 2024-12-04 14:28:19,499] Trial 244 finished with value: 0.039587156658794434 and parameters: {'num_factors': 1724, 'scaling_items': 0.013267193519134477, 'scaling_users': 0.01540082389723616}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.58 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.82 min. Users per second: 210


[I 2024-12-04 14:33:44,129] Trial 245 finished with value: 0.027417743039529183 and parameters: {'num_factors': 1751, 'scaling_items': 0.010025411700694816, 'scaling_users': 1.4399259418217316}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.04 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.13 min. Users per second: 190


[I 2024-12-04 14:39:54,381] Trial 246 finished with value: 0.04028763849033409 and parameters: {'num_factors': 1989, 'scaling_items': 0.013315506237990783, 'scaling_users': 0.011642940535023329}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.04 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.48 min. Users per second: 239


[I 2024-12-04 14:44:25,665] Trial 247 finished with value: 0.038688318985409746 and parameters: {'num_factors': 1464, 'scaling_items': 0.012384081280149676, 'scaling_users': 0.011422417752390689}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.45 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.73 min. Users per second: 218


[I 2024-12-04 14:49:36,357] Trial 248 finished with value: 0.039546321380075036 and parameters: {'num_factors': 1714, 'scaling_items': 0.0133966179345256, 'scaling_users': 0.014096146415499328}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.09 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.13 min. Users per second: 190


[I 2024-12-04 14:55:49,730] Trial 249 finished with value: 0.040360939983989506 and parameters: {'num_factors': 1985, 'scaling_items': 0.011363922346717103, 'scaling_users': 0.011469098035623826}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.05 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.43 min. Users per second: 244


[I 2024-12-04 15:00:18,729] Trial 250 finished with value: 0.03864848594304821 and parameters: {'num_factors': 1457, 'scaling_items': 0.011283087800834962, 'scaling_users': 0.011315695485878394}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.50 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.70 min. Users per second: 220


[I 2024-12-04 15:05:31,179] Trial 251 finished with value: 0.039459921694012384 and parameters: {'num_factors': 1683, 'scaling_items': 0.010019808617217634, 'scaling_users': 0.012217343283585558}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 0.56 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.78 sec. Users per second: 918


[I 2024-12-04 15:06:10,805] Trial 252 finished with value: 0.0056991284222637035 and parameters: {'num_factors': 6, 'scaling_items': 0.011611667397342222, 'scaling_users': 0.011604208616049941}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.16 min. Users per second: 188


[I 2024-12-04 15:12:20,940] Trial 253 finished with value: 0.040272281642907956 and parameters: {'num_factors': 1990, 'scaling_items': 0.012139324953009113, 'scaling_users': 0.013290598026824267}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.64 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.23 min. Users per second: 266


[I 2024-12-04 15:16:13,608] Trial 254 finished with value: 0.037850477260716144 and parameters: {'num_factors': 1254, 'scaling_items': 0.01248881921152001, 'scaling_users': 0.012980907878472607}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.42 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.76 min. Users per second: 215


[I 2024-12-04 15:21:24,628] Trial 255 finished with value: 0.03960896505439125 and parameters: {'num_factors': 1694, 'scaling_items': 0.011190076662579138, 'scaling_users': 0.011370319935943594}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.04 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.13 min. Users per second: 190


[I 2024-12-04 15:27:34,804] Trial 256 finished with value: 0.015594278222597227 and parameters: {'num_factors': 1990, 'scaling_items': 1.8705634238993913, 'scaling_users': 0.0125393604367637}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.08 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.50 min. Users per second: 237


[I 2024-12-04 15:32:10,358] Trial 257 finished with value: 0.038528227613561186 and parameters: {'num_factors': 1462, 'scaling_items': 0.01223178262613586, 'scaling_users': 0.01369158167299505}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.52 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.81 min. Users per second: 211


[I 2024-12-04 15:37:30,531] Trial 258 finished with value: 0.03964004998918458 and parameters: {'num_factors': 1700, 'scaling_items': 0.011004202288339444, 'scaling_users': 0.014922898216236756}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.07 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.46 min. Users per second: 241


[I 2024-12-04 15:42:02,252] Trial 259 finished with value: 0.03863354827345424 and parameters: {'num_factors': 1449, 'scaling_items': 0.013222394609050312, 'scaling_users': 0.010966579145144009}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 21.11 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.04 min. Users per second: 571


[I 2024-12-04 15:43:25,994] Trial 260 finished with value: 0.028575465387727716 and parameters: {'num_factors': 293, 'scaling_items': 0.010038813726100047, 'scaling_users': 0.0112671966243103}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.70 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.86 min. Users per second: 208


[I 2024-12-04 15:48:59,624] Trial 261 finished with value: 0.03964212358165024 and parameters: {'num_factors': 1740, 'scaling_items': 0.012488918575701294, 'scaling_users': 0.012894545553227544}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.15 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.13 min. Users per second: 189


[I 2024-12-04 15:55:17,033] Trial 262 finished with value: 0.03417094874659025 and parameters: {'num_factors': 1991, 'scaling_items': 0.23488644971358896, 'scaling_users': 0.013822029012867096}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.62 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.21 min. Users per second: 268


[I 2024-12-04 15:59:07,286] Trial 263 finished with value: 0.037692213102873386 and parameters: {'num_factors': 1223, 'scaling_items': 0.011360388611606934, 'scaling_users': 0.015635664751363274}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.22 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.18 min. Users per second: 187


[I 2024-12-04 16:05:31,476] Trial 264 finished with value: 0.04037746963747678 and parameters: {'num_factors': 1996, 'scaling_items': 0.01572756910203356, 'scaling_users': 0.011124755552542694}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.10 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.52 min. Users per second: 235


[I 2024-12-04 16:10:09,272] Trial 265 finished with value: 0.03891140966383127 and parameters: {'num_factors': 1514, 'scaling_items': 0.013467220589514776, 'scaling_users': 0.011109849897563303}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.09 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.11 min. Users per second: 191


[I 2024-12-04 16:16:21,418] Trial 266 finished with value: 0.04027406091902364 and parameters: {'num_factors': 1999, 'scaling_items': 0.016138583326505477, 'scaling_users': 0.012139078034575349}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.33 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.66 min. Users per second: 223


[I 2024-12-04 16:21:21,568] Trial 267 finished with value: 0.03926768072027091 and parameters: {'num_factors': 1643, 'scaling_items': 0.01639863608240399, 'scaling_users': 0.011078195089850229}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.06 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.11 min. Users per second: 191


[I 2024-12-04 16:27:31,998] Trial 268 finished with value: 0.040278641774636983 and parameters: {'num_factors': 1994, 'scaling_items': 0.016099670470885855, 'scaling_users': 0.012287607023518013}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.98 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.48 min. Users per second: 239


[I 2024-12-04 16:32:00,188] Trial 269 finished with value: 0.03867352624432128 and parameters: {'num_factors': 1450, 'scaling_items': 0.010901293047830236, 'scaling_users': 0.012174513041889448}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.52 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.85 min. Users per second: 208


[I 2024-12-04 16:37:22,249] Trial 270 finished with value: 0.039644393385015714 and parameters: {'num_factors': 1734, 'scaling_items': 0.013762956748101154, 'scaling_users': 0.010582653722839591}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.71 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.25 min. Users per second: 263


[I 2024-12-04 16:41:20,649] Trial 271 finished with value: 0.03790910641542608 and parameters: {'num_factors': 1288, 'scaling_items': 0.015237085896993864, 'scaling_users': 0.012305467943713497}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.49 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.73 min. Users per second: 218


[I 2024-12-04 16:46:34,105] Trial 272 finished with value: 0.03955510516235205 and parameters: {'num_factors': 1699, 'scaling_items': 0.011897116221837629, 'scaling_users': 0.010081077836840436}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.17 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.58 min. Users per second: 230


[I 2024-12-04 16:51:19,613] Trial 273 finished with value: 0.03878070198908646 and parameters: {'num_factors': 1494, 'scaling_items': 0.010840981596325829, 'scaling_users': 0.017649122929211503}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.45 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.02 min. Users per second: 293


[I 2024-12-04 16:54:48,463] Trial 274 finished with value: 0.03705849871905361 and parameters: {'num_factors': 1086, 'scaling_items': 0.01369020862657502, 'scaling_users': 0.011249842767382043}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.51 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.79 min. Users per second: 213


[I 2024-12-04 17:00:07,008] Trial 275 finished with value: 0.03966701338024432 and parameters: {'num_factors': 1711, 'scaling_items': 0.010012591091887115, 'scaling_users': 0.013420732238014293}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.16 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.15 min. Users per second: 188


[I 2024-12-04 17:06:26,037] Trial 276 finished with value: 0.040294675326700786 and parameters: {'num_factors': 1995, 'scaling_items': 0.015928219409387066, 'scaling_users': 0.015263814313429238}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.11 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.52 min. Users per second: 236


[I 2024-12-04 17:11:04,102] Trial 277 finished with value: 0.035868030332420135 and parameters: {'num_factors': 1484, 'scaling_items': 0.1156857718816888, 'scaling_users': 0.02023697090114056}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 10.88 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 50.45 sec. Users per second: 706


[I 2024-12-04 17:12:05,696] Trial 278 finished with value: 0.022811180453826557 and parameters: {'num_factors': 139, 'scaling_items': 0.012705460507598193, 'scaling_users': 0.015612167062109562}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.53 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.78 min. Users per second: 213


[I 2024-12-04 17:17:24,796] Trial 279 finished with value: 0.03956511526275405 and parameters: {'num_factors': 1717, 'scaling_items': 0.015277507276447711, 'scaling_users': 0.014925651109014153}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.11 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.15 min. Users per second: 188


[I 2024-12-04 17:23:40,806] Trial 280 finished with value: 0.04034562215577672 and parameters: {'num_factors': 1987, 'scaling_items': 0.01157844499418631, 'scaling_users': 0.013497894661126041}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.75 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.24 min. Users per second: 264


[I 2024-12-04 17:27:40,520] Trial 281 finished with value: 0.03788880416145347 and parameters: {'num_factors': 1283, 'scaling_items': 0.011242418439174392, 'scaling_users': 0.01660422079971325}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.61 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.89 min. Users per second: 205


[I 2024-12-04 17:33:10,398] Trial 282 finished with value: 0.0396291402172131 and parameters: {'num_factors': 1733, 'scaling_items': 0.012162561814963493, 'scaling_users': 0.01406564431593671}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.20 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.16 min. Users per second: 187


[I 2024-12-04 17:39:32,351] Trial 283 finished with value: 0.0402670887430671 and parameters: {'num_factors': 1979, 'scaling_items': 0.01331290793308305, 'scaling_users': 0.017235028793873258}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.94 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.46 min. Users per second: 241


[I 2024-12-04 17:43:56,252] Trial 284 finished with value: 0.03843597393070493 and parameters: {'num_factors': 1400, 'scaling_items': 0.011282530710818106, 'scaling_users': 0.013510212012243387}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.25 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.65 min. Users per second: 224


[I 2024-12-04 17:48:50,609] Trial 285 finished with value: 0.039042583339463505 and parameters: {'num_factors': 1568, 'scaling_items': 0.010030287455504239, 'scaling_users': 0.014800299635120952}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.13 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.14 min. Users per second: 189


[I 2024-12-04 17:55:06,736] Trial 286 finished with value: 0.04021295348686695 and parameters: {'num_factors': 1985, 'scaling_items': 0.01237400124444789, 'scaling_users': 0.02259895086870665}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 1.66 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.22 min. Users per second: 267


[I 2024-12-04 17:58:59,380] Trial 287 finished with value: 0.03747651935241362 and parameters: {'num_factors': 1205, 'scaling_items': 0.014245322640361938, 'scaling_users': 0.012516916984024908}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.54 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.79 min. Users per second: 212


[I 2024-12-04 18:04:19,364] Trial 288 finished with value: 0.039625930608230185 and parameters: {'num_factors': 1690, 'scaling_items': 0.010938518282803953, 'scaling_users': 0.011280379274100319}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.07 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.50 min. Users per second: 237


[I 2024-12-04 18:08:53,631] Trial 289 finished with value: 0.038628544895505185 and parameters: {'num_factors': 1460, 'scaling_items': 0.013061548457640019, 'scaling_users': 0.015575920580830372}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 2.57 min
EvaluatorHoldout: Processed 35595 (100.0%) in 2.79 min. Users per second: 212


[I 2024-12-04 18:14:15,405] Trial 290 finished with value: 0.03958354236483044 and parameters: {'num_factors': 1712, 'scaling_items': 0.01157246758165473, 'scaling_users': 0.018648448946011364}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 0.41 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.12 sec. Users per second: 887


[I 2024-12-04 18:14:56,198] Trial 291 finished with value: 0.004267093202407151 and parameters: {'num_factors': 2, 'scaling_items': 0.010015105948205401, 'scaling_users': 0.013393374022400678}. Best is trial 194 with value: 0.0405380961299616.


ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.10 min
EvaluatorHoldout: Processed 35595 (100.0%) in 3.15 min. Users per second: 188


[I 2024-12-04 18:21:11,731] Trial 292 finished with value: 0.040294849240907586 and parameters: {'num_factors': 1980, 'scaling_items': 0.013934595673411383, 'scaling_users': 0.012213209807098}. Best is trial 194 with value: 0.0405380961299616.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ScaledPureSVDRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

ScaledPureSVDRecommender: Computing SVD decomposition...
ScaledPureSVDRecommender: Computing SVD decomposition... done in 3.30 min


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_ScaledPureSVD.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/WithoutKFCV/MatrixFactorization/PureSVDRecommender/ScaledPureSVDRecommender/best_params_ScaledPureSVD.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/MatrixFactorization/PureSVDRecommender/ScaledPureSVDRecommender/history_ScaledPureSVD.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/MatrixFactorization/PureSVDRecommender/ScaledPureSVDRecommender/Submission/submission_ScaledPureSVD.csv' created successfully.
