# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 2214, done.[K
remote: Counting objects: 100% (596/596), done.[K
remote: Compressing objects: 100% (218/218), done.[K
remote: Total 2214 (delta 381), reused 584 (delta 370), pack-reused 1618 (from 1)[K
Receiving objects: 100% (2214/2214), 147.58 MiB | 34.29 MiB/s, done.
Resolving deltas: 100% (1326/1326), done.
Updating files: 100% (246/246), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [None]:
%cd /kaggle/working/RECsys_Challenge2024
!python run_compile_all_cython.py

run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __pyx_v_start_pos_seen_items; __pyx_t_2

In [None]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from RECsys_Challenge2024.Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RecSys_Challenge2024'
GH_PATH = 'TrainedModels/KFCV' # add rith or without KFCV

np.random.seed(42)

## Import the repository

In [None]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [8]:
config = {
    'model': 'LinearCombination',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_LinearCombination.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [None]:
try:
    shutil.copyfile(
        f'{K_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [10]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [11]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [12]:
%cd /kaggle/working/RECsys_Challenge2024/

/kaggle/working/RECsys_Challenge2024


In [13]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


## Import the best recommeders previously trained.

In [14]:
from Recommenders.KNN.ItemKNN_CFCBF_Hybrid_Recommender import ItemKNN_CFCBF_Hybrid_Recommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

recommenders = {
    "ItemKNN_CFCBF_Hybrid": ItemKNN_CFCBF_Hybrid_Recommender,
    "SLIMElasticNet": SLIMElasticNetRecommender,
    "RP3beta": RP3betaRecommender
}

loaded_recommenders = {}

for recommender_name, recommender_class in recommenders.items():
    
    start_time = time.time()
    
    print(f"{recommender_name} Model - TRAINING with its best parameters.")
    try:
        recommender = recommender_class(URM_train)
    except Exception as e:
        recommender = recommender_class(URM_train, ICM_all)

    # Extract best parameter values of the relative recommender model.
    with open(f'TrainedModels/{recommender_name}Recommender/best_params_{recommender_name}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    
    recommender.fit(**best_params)
    
    loaded_recommenders[recommender_name] = recommender
    
    new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time)
    print("Training done in {:2f}{}.\n".format(new_time_value, new_time_unit))

ItemKNN_CFCBF_Hybrid Model - TRAINING with its best parameters.
Similarity column 38121 (100.0%), 658.53 column/sec. Elapsed time 57.89 sec
Training done in 58.568434sec.

SLIMElasticNet Model - TRAINING with its best parameters.
SLIMElasticNetRecommender: Processed 2732 ( 7.2%) in 5.00 min. Items per second: 9.10
SLIMElasticNetRecommender: Processed 5292 (13.9%) in 10.00 min. Items per second: 8.82
SLIMElasticNetRecommender: Processed 7987 (21.0%) in 15.00 min. Items per second: 8.87
SLIMElasticNetRecommender: Processed 10648 (27.9%) in 20.00 min. Items per second: 8.87
SLIMElasticNetRecommender: Processed 13296 (34.9%) in 25.00 min. Items per second: 8.86
SLIMElasticNetRecommender: Processed 16061 (42.1%) in 30.00 min. Items per second: 8.92
SLIMElasticNetRecommender: Processed 18818 (49.4%) in 35.00 min. Items per second: 8.96
SLIMElasticNetRecommender: Processed 21558 (56.6%) in 40.01 min. Items per second: 8.98
SLIMElasticNetRecommender: Processed 24376 (63.9%) in 45.01 min. Items

In [15]:
from Recommenders.Hybrid.LinearCombinationRecommender import LinearCombinationRecommender 

def objective_function_(optuna_trial):
    
    recommender_instance = LinearCombinationRecommender(URM_train, loaded_recommenders.values())
    
    norm = optuna_trial.suggest_categorical('norm', [1, 2, np.inf, -np.inf])
    itemknn_weight = optuna_trial.suggest_float('itemknn_weight', 0.0, 1.0)
    slim_weight = optuna_trial.suggest_float('slim_weight', 0.0, 1.0)
    rp3beta_weight = optuna_trial.suggest_float('rp3beta_weight', 0.0, 1.0)

    # Ensure the weights sum to 1 (Normalization)
    total_weight = itemknn_weight + slim_weight + rp3beta_weight
    itemknn_weight /= total_weight
    slim_weight /= total_weight
    rp3beta_weight /= total_weight
    
    full_hyperp = {
                    "weights": [itemknn_weight, slim_weight,rp3beta_weight],
                    "norm": norm
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [16]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_, n_trials=50)

[I 2024-11-20 17:54:33,349] A new study created in RDB with name: hyperparameters_tuning_LinearCombination


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.58 min. Users per second: 374


[I 2024-11-20 17:56:08,626] Trial 0 finished with value: 0.060810731175457905 and parameters: {'norm': 2, 'itemknn_weight': 0.04839102714240118, 'slim_weight': 0.4714419234144599, 'rp3beta_weight': 0.0007093145438766824}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.58 min. Users per second: 376


[I 2024-11-20 17:57:43,491] Trial 1 finished with value: 0.05822511856266403 and parameters: {'norm': 1, 'itemknn_weight': 0.22525755778879286, 'slim_weight': 0.592857769651083, 'rp3beta_weight': 0.26827303332013097}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.57 min. Users per second: 378


[I 2024-11-20 17:59:17,927] Trial 2 finished with value: 0.058320058997048375 and parameters: {'norm': -inf, 'itemknn_weight': 0.6432492946694525, 'slim_weight': 0.727266903400412, 'rp3beta_weight': 0.2102366253102811}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.57 min. Users per second: 378


[I 2024-11-20 18:00:52,340] Trial 3 finished with value: 0.055997240784217354 and parameters: {'norm': inf, 'itemknn_weight': 0.06934927351322939, 'slim_weight': 0.09799018494839873, 'rp3beta_weight': 0.7859203364350316}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.58 min. Users per second: 375


[I 2024-11-20 18:02:27,523] Trial 4 finished with value: 0.058477323148201775 and parameters: {'norm': -inf, 'itemknn_weight': 0.6101269650823264, 'slim_weight': 0.6886016364841502, 'rp3beta_weight': 0.46458669276509057}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.58 min. Users per second: 376


[I 2024-11-20 18:04:02,450] Trial 5 finished with value: 0.058894567856639254 and parameters: {'norm': inf, 'itemknn_weight': 0.19867309322400806, 'slim_weight': 0.45907636159417176, 'rp3beta_weight': 0.7302706572869135}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 374


[I 2024-11-20 18:05:37,795] Trial 6 finished with value: 0.05671849310028651 and parameters: {'norm': 1, 'itemknn_weight': 0.3288281380902458, 'slim_weight': 0.2731306785469324, 'rp3beta_weight': 0.6702151772171667}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 372


[I 2024-11-20 18:07:13,722] Trial 7 finished with value: 0.05976530166310854 and parameters: {'norm': 2, 'itemknn_weight': 0.29474547157778275, 'slim_weight': 0.8606555001429231, 'rp3beta_weight': 0.6191223914895924}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 374


[I 2024-11-20 18:08:49,031] Trial 8 finished with value: 0.06033128649689828 and parameters: {'norm': -inf, 'itemknn_weight': 0.1223527926685799, 'slim_weight': 0.5542971391971991, 'rp3beta_weight': 0.3715341243308009}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.58 min. Users per second: 375


[I 2024-11-20 18:10:24,114] Trial 9 finished with value: 0.05819795784587005 and parameters: {'norm': -inf, 'itemknn_weight': 0.3925220881630357, 'slim_weight': 0.3722862017144426, 'rp3beta_weight': 0.4800444293438646}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 373


[I 2024-11-20 18:11:59,667] Trial 10 finished with value: 0.051707776194711216 and parameters: {'norm': 2, 'itemknn_weight': 0.9537487379130569, 'slim_weight': 0.017684562202038956, 'rp3beta_weight': 0.02707788307021985}. Best is trial 0 with value: 0.060810731175457905.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 18:13:35,755] Trial 11 finished with value: 0.06086529229849746 and parameters: {'norm': 2, 'itemknn_weight': 0.014030163589050293, 'slim_weight': 0.2498981484521517, 'rp3beta_weight': 0.02854856469446654}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 372


[I 2024-11-20 18:15:11,513] Trial 12 finished with value: 0.06061746009449187 and parameters: {'norm': 2, 'itemknn_weight': 0.009501255383778903, 'slim_weight': 0.23065749871262797, 'rp3beta_weight': 0.006232668788242485}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:16:47,993] Trial 13 finished with value: 0.059933077590260536 and parameters: {'norm': 2, 'itemknn_weight': 0.012325802942820496, 'slim_weight': 0.2334323577409061, 'rp3beta_weight': 0.14223950318680356}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:18:24,822] Trial 14 finished with value: 0.057561243441983156 and parameters: {'norm': 2, 'itemknn_weight': 0.5074426568790298, 'slim_weight': 0.3885971673558575, 'rp3beta_weight': 0.13417243981153493}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:20:01,195] Trial 15 finished with value: 0.05853918197891875 and parameters: {'norm': 2, 'itemknn_weight': 0.837115490230956, 'slim_weight': 0.93434138058445, 'rp3beta_weight': 0.3195602688401754}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:21:37,942] Trial 16 finished with value: 0.056723750660537586 and parameters: {'norm': 2, 'itemknn_weight': 0.1754080009530517, 'slim_weight': 0.14325658091653737, 'rp3beta_weight': 0.8892579766226341}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:23:14,304] Trial 17 finished with value: 0.057011821706720714 and parameters: {'norm': 2, 'itemknn_weight': 0.4706864512396022, 'slim_weight': 0.3398207225653512, 'rp3beta_weight': 0.05073677652145462}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 373


[I 2024-11-20 18:24:49,956] Trial 18 finished with value: 0.05878701752742864 and parameters: {'norm': 1, 'itemknn_weight': 0.13533043050929272, 'slim_weight': 0.47570952386194604, 'rp3beta_weight': 0.15044222411566976}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 372


[I 2024-11-20 18:26:25,725] Trial 19 finished with value: 0.05901979165969701 and parameters: {'norm': inf, 'itemknn_weight': 0.27675693390347544, 'slim_weight': 0.6566264603767473, 'rp3beta_weight': 0.970040474204904}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 372


[I 2024-11-20 18:28:01,694] Trial 20 finished with value: 0.05779355603270272 and parameters: {'norm': 2, 'itemknn_weight': 0.07492554296623087, 'slim_weight': 0.14940735065595526, 'rp3beta_weight': 0.39479725097296814}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:29:38,270] Trial 21 finished with value: 0.0607023580982689 and parameters: {'norm': 2, 'itemknn_weight': 0.004761004644580478, 'slim_weight': 0.28351335722161514, 'rp3beta_weight': 0.0313888491500785}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:31:15,109] Trial 22 finished with value: 0.06056326129271596 and parameters: {'norm': 2, 'itemknn_weight': 0.039623988106817054, 'slim_weight': 0.3026874116141727, 'rp3beta_weight': 0.11332084144763657}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:32:52,122] Trial 23 finished with value: 0.06016018947729697 and parameters: {'norm': 2, 'itemknn_weight': 0.12779007982443932, 'slim_weight': 0.4366709865082766, 'rp3beta_weight': 0.2390486067205928}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:34:28,908] Trial 24 finished with value: 0.0603195985703345 and parameters: {'norm': 2, 'itemknn_weight': 0.1931475575984507, 'slim_weight': 0.5453611536575308, 'rp3beta_weight': 0.004611451287437451}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:36:05,799] Trial 25 finished with value: 0.060497942015217764 and parameters: {'norm': 2, 'itemknn_weight': 0.0028501380735016296, 'slim_weight': 0.2592776476942067, 'rp3beta_weight': 0.07759097979828522}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 18:37:41,885] Trial 26 finished with value: 0.05517106580422898 and parameters: {'norm': 2, 'itemknn_weight': 0.3849923500261588, 'slim_weight': 0.02159680207271103, 'rp3beta_weight': 0.22167915734932359}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:39:18,188] Trial 27 finished with value: 0.05889498591963642 and parameters: {'norm': 1, 'itemknn_weight': 0.10024588571777371, 'slim_weight': 0.40950444829112664, 'rp3beta_weight': 0.16699354376726194}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 18:40:54,440] Trial 28 finished with value: 0.057788195907663084 and parameters: {'norm': inf, 'itemknn_weight': 0.2438692705595471, 'slim_weight': 0.17576873739985766, 'rp3beta_weight': 0.5488531247546857}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 372


[I 2024-11-20 18:42:30,300] Trial 29 finished with value: 0.05608688686880674 and parameters: {'norm': 1, 'itemknn_weight': 0.6565455772626673, 'slim_weight': 0.6233093927428501, 'rp3beta_weight': 0.3206197674651893}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 18:44:06,365] Trial 30 finished with value: 0.05839227129724196 and parameters: {'norm': 2, 'itemknn_weight': 0.7405046071423772, 'slim_weight': 0.7827750455200508, 'rp3beta_weight': 0.29437465606638913}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:45:42,827] Trial 31 finished with value: 0.06055042731612309 and parameters: {'norm': 2, 'itemknn_weight': 0.0024980125372562577, 'slim_weight': 0.19870530876374437, 'rp3beta_weight': 0.009778034738781048}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:47:19,666] Trial 32 finished with value: 0.06052371587769664 and parameters: {'norm': 2, 'itemknn_weight': 0.08670751927554081, 'slim_weight': 0.3013076084257233, 'rp3beta_weight': 0.06819312007514067}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 370


[I 2024-11-20 18:48:56,185] Trial 33 finished with value: 0.05862588935934724 and parameters: {'norm': 2, 'itemknn_weight': 0.058227592646589055, 'slim_weight': 0.0670588305927505, 'rp3beta_weight': 0.08148950146663102}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 18:50:32,211] Trial 34 finished with value: 0.05967213270099705 and parameters: {'norm': -inf, 'itemknn_weight': 0.17336528159694492, 'slim_weight': 0.33277580209456875, 'rp3beta_weight': 0.19522864633481918}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:52:08,800] Trial 35 finished with value: 0.06086073485441203 and parameters: {'norm': 2, 'itemknn_weight': 0.06280384396171663, 'slim_weight': 0.5170879718118307, 'rp3beta_weight': 0.10134211027680617}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 18:53:45,145] Trial 36 finished with value: 0.06021979188266408 and parameters: {'norm': inf, 'itemknn_weight': 0.250894177663891, 'slim_weight': 0.5306055041441866, 'rp3beta_weight': 0.08976795561099693}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:55:21,731] Trial 37 finished with value: 0.06029318925210055 and parameters: {'norm': 2, 'itemknn_weight': 0.15395089481943056, 'slim_weight': 0.49297289962576035, 'rp3beta_weight': 0.19710994678385246}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 369


[I 2024-11-20 18:56:58,424] Trial 38 finished with value: 0.0607301486966448 and parameters: {'norm': -inf, 'itemknn_weight': 0.07383032550605031, 'slim_weight': 0.7186290736528992, 'rp3beta_weight': 0.26645252396900343}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.61 min. Users per second: 368


[I 2024-11-20 18:58:35,287] Trial 39 finished with value: 0.060017145042216856 and parameters: {'norm': -inf, 'itemknn_weight': 0.3573376301766952, 'slim_weight': 0.8408266101441054, 'rp3beta_weight': 0.25806065175521287}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 19:00:11,633] Trial 40 finished with value: 0.06009771971718713 and parameters: {'norm': -inf, 'itemknn_weight': 0.23227070703550512, 'slim_weight': 0.6754060106473809, 'rp3beta_weight': 0.4208955675603838}. Best is trial 11 with value: 0.06086529229849746.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 373


[I 2024-11-20 19:01:47,215] Trial 41 finished with value: 0.06092930164527146 and parameters: {'norm': -inf, 'itemknn_weight': 0.07246307620697619, 'slim_weight': 0.7505575301741138, 'rp3beta_weight': 0.11003777987867452}. Best is trial 41 with value: 0.06092930164527146.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 372


[I 2024-11-20 19:03:23,139] Trial 42 finished with value: 0.06094151465895933 and parameters: {'norm': -inf, 'itemknn_weight': 0.07597305000217477, 'slim_weight': 0.7740825395544235, 'rp3beta_weight': 0.12494359026821118}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 372


[I 2024-11-20 19:04:58,921] Trial 43 finished with value: 0.06089599818950693 and parameters: {'norm': -inf, 'itemknn_weight': 0.11362906757204286, 'slim_weight': 0.7847958489036125, 'rp3beta_weight': 0.1120211269012182}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 372


[I 2024-11-20 19:06:34,782] Trial 44 finished with value: 0.06092937745402824 and parameters: {'norm': -inf, 'itemknn_weight': 0.11227313914029693, 'slim_weight': 0.9811544035321462, 'rp3beta_weight': 0.17201492528487697}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 373


[I 2024-11-20 19:08:10,513] Trial 45 finished with value: 0.06033771240387151 and parameters: {'norm': -inf, 'itemknn_weight': 0.31464209744592553, 'slim_weight': 0.9813471156211333, 'rp3beta_weight': 0.17420963601259967}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 371


[I 2024-11-20 19:09:46,660] Trial 46 finished with value: 0.06090891354903005 and parameters: {'norm': -inf, 'itemknn_weight': 0.11504636796860238, 'slim_weight': 0.8856282195083748, 'rp3beta_weight': 0.13171548217302698}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.60 min. Users per second: 370


[I 2024-11-20 19:11:23,152] Trial 47 finished with value: 0.06037430462188024 and parameters: {'norm': -inf, 'itemknn_weight': 0.20261575704540807, 'slim_weight': 0.8862987832477116, 'rp3beta_weight': 0.563001492042273}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 373


[I 2024-11-20 19:12:58,845] Trial 48 finished with value: 0.06067225310314076 and parameters: {'norm': -inf, 'itemknn_weight': 0.1191787100928837, 'slim_weight': 0.7781325191771746, 'rp3beta_weight': 0.3484640644536699}. Best is trial 42 with value: 0.06094151465895933.


LinearCombinationRecommender: Fit completed in 0.00 seconds.
EvaluatorHoldout: Processed 35595 (100.0%) in 1.59 min. Users per second: 374


[I 2024-11-20 19:14:34,162] Trial 49 finished with value: 0.060776465617382744 and parameters: {'norm': -inf, 'itemknn_weight': 0.15082913221236485, 'slim_weight': 0.8158840353186733, 'rp3beta_weight': 0.1325839762190068}. Best is trial 42 with value: 0.06094151465895933.


## Some optuna visualizations on recommender parameters

In [17]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [18]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [None]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
            
    fit_params = {
    "weights": list(best_params.values())[1:], 
    "norm": best_params["norm"]
    }
    
    recommender_instance = LinearCombinationRecommender(URM_train + URM_validation, loaded_recommenders.values())
    recommender_instance.fit(**fit_params)

LinearCombinationRecommender: Fit completed in 0.00 seconds.


# Testing

Create the recommendations for the submission. 

In [None]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config['model']}.csv')

Submission file saved as /kaggle/working/submission_LinearCombination.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [None]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)'
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/LinearCombinationRecommender/best_params_LinearCombination.json' created successfully.


Save the history of the tuned model.

In [None]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/LinearCombinationRecommender/history_LinearCombination.db' created successfully.


Save the best trained model and its submission.

In [None]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/LinearCombinationRecommender/Submission/submission_LinearCombination.csv' created successfully.
