# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 2923, done.[K
remote: Counting objects: 100% (396/396), done.[K
remote: Compressing objects: 100% (174/174), done.[K
remote: Total 2923 (delta 191), reused 361 (delta 173), pack-reused 2527 (from 1)[K
Receiving objects: 100% (2923/2923), 152.71 MiB | 19.39 MiB/s, done.
Resolving deltas: 100% (1710/1710), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/SingleRecommenders/PureSVDRecommender'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'PureSVDItem',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_PureSVDItem.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDItemRecommender

def objective_function_PureSVDItem(optuna_trial):
    
    recommender_instance = PureSVDItemRecommender(URM_train)
    
    full_hyperp = {
                    'topK': optuna_trial.suggest_int('topK', 1, 1000, log=True),
                    'num_factors': optuna_trial.suggest_int('num_factors', 1, 1000, log=True)
                  } 
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_PureSVDItem, n_trials=50)

[I 2024-11-29 13:14:33,485] Using an existing study with name 'hyperparameters_tuning_PureSVDItem' instead of creating a new one.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 59.37 sec. Users per second: 600


[I 2024-11-29 13:18:56,311] Trial 203 finished with value: 0.0378307970844831 and parameters: {'topK': 534, 'num_factors': 993}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 58.65 sec. Users per second: 607


[I 2024-11-29 13:22:44,487] Trial 204 finished with value: 0.03602771924895694 and parameters: {'topK': 515, 'num_factors': 763}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.04 min. Users per second: 572


[I 2024-11-29 13:27:11,923] Trial 205 finished with value: 0.037787480183812686 and parameters: {'topK': 657, 'num_factors': 998}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.08 min. Users per second: 551


[I 2024-11-29 13:31:28,795] Trial 206 finished with value: 0.03663653491550531 and parameters: {'topK': 747, 'num_factors': 840}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 59.42 sec. Users per second: 599


[I 2024-11-29 13:35:07,787] Trial 207 finished with value: 0.03519878951252622 and parameters: {'topK': 533, 'num_factors': 683}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 57.17 sec. Users per second: 623


[I 2024-11-29 13:39:10,620] Trial 208 finished with value: 0.03673505731364853 and parameters: {'topK': 469, 'num_factors': 854}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.22 sec. Users per second: 633


[I 2024-11-29 13:42:47,774] Trial 209 finished with value: 0.035865615600549 and parameters: {'topK': 429, 'num_factors': 754}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.02 min. Users per second: 581


[I 2024-11-29 13:46:58,613] Trial 210 finished with value: 0.03697746027286568 and parameters: {'topK': 594, 'num_factors': 875}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 53.59 sec. Users per second: 664


[I 2024-11-29 13:51:07,103] Trial 211 finished with value: 0.03772680753717281 and parameters: {'topK': 362, 'num_factors': 971}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 54.72 sec. Users per second: 651


[I 2024-11-29 13:55:02,592] Trial 212 finished with value: 0.03709000729101726 and parameters: {'topK': 392, 'num_factors': 875}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 57.79 sec. Users per second: 616


[I 2024-11-29 13:58:54,391] Trial 213 finished with value: 0.03612747911357133 and parameters: {'topK': 497, 'num_factors': 779}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.04 sec. Users per second: 684


[I 2024-11-29 14:03:05,678] Trial 214 finished with value: 0.03772794689819434 and parameters: {'topK': 318, 'num_factors': 981}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.00 min. Users per second: 592


[I 2024-11-29 14:06:44,881] Trial 215 finished with value: 0.034910951466786626 and parameters: {'topK': 566, 'num_factors': 667}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.76 sec. Users per second: 675


[I 2024-11-29 14:08:39,878] Trial 216 finished with value: 0.008820926338414867 and parameters: {'topK': 458, 'num_factors': 11}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.05 min. Users per second: 563


[I 2024-11-29 14:13:14,189] Trial 217 finished with value: 0.03780382588958085 and parameters: {'topK': 694, 'num_factors': 997}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 55.50 sec. Users per second: 641


[I 2024-11-29 14:17:26,863] Trial 218 finished with value: 0.037865810696614224 and parameters: {'topK': 403, 'num_factors': 1000}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 32.21 sec. Users per second: 1105


[I 2024-11-29 14:20:23,022] Trial 219 finished with value: 0.0330785835802686 and parameters: {'topK': 7, 'num_factors': 768}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.13 min. Users per second: 527


[I 2024-11-29 14:24:41,607] Trial 220 finished with value: 0.03632172344519431 and parameters: {'topK': 867, 'num_factors': 814}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.80 sec. Users per second: 627


[I 2024-11-29 14:28:56,536] Trial 221 finished with value: 0.03765331317712144 and parameters: {'topK': 459, 'num_factors': 985}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 54.87 sec. Users per second: 649


[I 2024-11-29 14:32:44,525] Trial 222 finished with value: 0.03681862309000805 and parameters: {'topK': 392, 'num_factors': 858}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 59.03 sec. Users per second: 603


[I 2024-11-29 14:37:06,712] Trial 223 finished with value: 0.037894629172546136 and parameters: {'topK': 551, 'num_factors': 1000}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.00 min. Users per second: 591


[I 2024-11-29 14:41:09,288] Trial 224 finished with value: 0.036836882966885996 and parameters: {'topK': 579, 'num_factors': 858}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 59.86 sec. Users per second: 595


[I 2024-11-29 14:45:12,445] Trial 225 finished with value: 0.03698585720751614 and parameters: {'topK': 537, 'num_factors': 878}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.04 min. Users per second: 570


[I 2024-11-29 14:49:41,227] Trial 226 finished with value: 0.03779304766809904 and parameters: {'topK': 672, 'num_factors': 998}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 58.23 sec. Users per second: 611


[I 2024-11-29 14:53:24,824] Trial 227 finished with value: 0.03569553196565277 and parameters: {'topK': 496, 'num_factors': 730}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.02 min. Users per second: 580


[I 2024-11-29 14:57:34,032] Trial 228 finished with value: 0.03677258487793538 and parameters: {'topK': 616, 'num_factors': 853}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.33 sec. Users per second: 632


[I 2024-11-29 15:01:00,732] Trial 229 finished with value: 0.034957116770010614 and parameters: {'topK': 419, 'num_factors': 660}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.08 min. Users per second: 552


[I 2024-11-29 15:05:06,061] Trial 230 finished with value: 0.03587252980510337 and parameters: {'topK': 743, 'num_factors': 761}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 53.56 sec. Users per second: 665


[I 2024-11-29 15:09:14,819] Trial 231 finished with value: 0.03785226099617048 and parameters: {'topK': 348, 'num_factors': 987}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 50.94 sec. Users per second: 699


[I 2024-11-29 15:13:15,614] Trial 232 finished with value: 0.03780507338956431 and parameters: {'topK': 286, 'num_factors': 995}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.06 sec. Users per second: 635


[I 2024-11-29 15:17:30,839] Trial 233 finished with value: 0.03786677391375943 and parameters: {'topK': 423, 'num_factors': 996}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 54.72 sec. Users per second: 651


[I 2024-11-29 15:21:45,288] Trial 234 finished with value: 0.03792469960780013 and parameters: {'topK': 397, 'num_factors': 997}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.10 sec. Users per second: 683


[I 2024-11-29 15:25:27,841] Trial 235 finished with value: 0.036780160179442495 and parameters: {'topK': 319, 'num_factors': 850}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 54.86 sec. Users per second: 649


[I 2024-11-29 15:29:24,756] Trial 236 finished with value: 0.03696868206476171 and parameters: {'topK': 387, 'num_factors': 864}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.25 sec. Users per second: 633


[I 2024-11-29 15:33:08,682] Trial 237 finished with value: 0.03602596226953438 and parameters: {'topK': 422, 'num_factors': 767}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 57.00 sec. Users per second: 624


[I 2024-11-29 15:37:24,447] Trial 238 finished with value: 0.03785583850059079 and parameters: {'topK': 474, 'num_factors': 995}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 53.86 sec. Users per second: 661


[I 2024-11-29 15:41:13,920] Trial 239 finished with value: 0.036931031868662284 and parameters: {'topK': 354, 'num_factors': 865}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 58.26 sec. Users per second: 611


[I 2024-11-29 15:44:54,227] Trial 240 finished with value: 0.03551131445695156 and parameters: {'topK': 461, 'num_factors': 722}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 57.31 sec. Users per second: 621


[I 2024-11-29 15:48:57,447] Trial 241 finished with value: 0.03706063808676394 and parameters: {'topK': 477, 'num_factors': 887}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 55.08 sec. Users per second: 646


[I 2024-11-29 15:53:13,892] Trial 242 finished with value: 0.037813980918488685 and parameters: {'topK': 403, 'num_factors': 992}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 58.58 sec. Users per second: 608


[I 2024-11-29 15:57:33,849] Trial 243 finished with value: 0.03788253879067063 and parameters: {'topK': 521, 'num_factors': 991}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 1.01 min. Users per second: 590


[I 2024-11-29 16:01:59,966] Trial 244 finished with value: 0.03781961417801986 and parameters: {'topK': 525, 'num_factors': 995}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.74 sec. Users per second: 675


[I 2024-11-29 16:06:04,640] Trial 245 finished with value: 0.03790857240962445 and parameters: {'topK': 342, 'num_factors': 999}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.16 sec. Users per second: 682


[I 2024-11-29 16:09:48,279] Trial 246 finished with value: 0.036582338343398954 and parameters: {'topK': 326, 'num_factors': 832}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 50.50 sec. Users per second: 705


[I 2024-11-29 16:13:16,527] Trial 247 finished with value: 0.03606436609386336 and parameters: {'topK': 263, 'num_factors': 768}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 54.35 sec. Users per second: 655


[I 2024-11-29 16:17:10,979] Trial 248 finished with value: 0.03685804921771926 and parameters: {'topK': 376, 'num_factors': 865}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 56.10 sec. Users per second: 635


[I 2024-11-29 16:21:06,254] Trial 249 finished with value: 0.03694960947341766 and parameters: {'topK': 439, 'num_factors': 874}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 51.61 sec. Users per second: 690


[I 2024-11-29 16:25:10,371] Trial 250 finished with value: 0.03787666249718375 and parameters: {'topK': 313, 'num_factors': 995}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 52.45 sec. Users per second: 679


[I 2024-11-29 16:28:28,695] Trial 251 finished with value: 0.03509981895085015 and parameters: {'topK': 310, 'num_factors': 655}. Best is trial 166 with value: 0.03796322048976788.


PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 35595 (100.0%) in 48.87 sec. Users per second: 728


[I 2024-11-29 16:31:53,356] Trial 252 finished with value: 0.03612613350813809 and parameters: {'topK': 225, 'num_factors': 768}. Best is trial 166 with value: 0.03796322048976788.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = PureSVDItemRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_PureSVDItem.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/WithoutKFCV/SingleRecommenders/PureSVDRecommender/PureSVDItemRecommender/best_params_PureSVDItem.json' created successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/SingleRecommenders/PureSVDRecommender/PureSVDItemRecommender/history_PureSVDItem.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/SingleRecommenders/PureSVDRecommender/PureSVDItemRecommender/Submission/submission_PureSVDItem.csv' created successfully.
