# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 2029, done.[K
remote: Counting objects: 100% (411/411), done.[K
remote: Compressing objects: 100% (215/215), done.[K
remote: Total 2029 (delta 248), reused 323 (delta 188), pack-reused 1618 (from 1)[K
Receiving objects: 100% (2029/2029), 141.68 MiB | 28.60 MiB/s, done.
Resolving deltas: 100% (1193/1193), done.
Updating files: 100% (238/238), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [None]:
% cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__pyx_t_4 = (__pyx_v_start_pos_impression

In [None]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RecSys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/SingleRecommenders' # add rith or without KFCV

np.random.seed(42)

## Import the repository

In [None]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [8]:
config = {
    'model': 'SLIMElasticNet',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIMElasticNet.db',
    'copy_prev_best_params': False,
    'tune_best_params': False,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [None]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [10]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [11]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [13]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [14]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

def objective_function_SLIMElasticNet(optuna_trial):
    
    recommender_instance = SLIMElasticNetRecommender(URM_train)
    
    full_hyperp = {
                   "topK": optuna_trial.suggest_int("topK", 0, 1500),
                   "l1_ratio": optuna_trial.suggest_float("l1_ratio", 0.01, 1.0, log=True),
                    "alpha": optuna_trial.suggest_float("alpha", 1e-4, 1e-1, log=True),
                    "positive_only": optuna_trial.suggest_categorical("positive_only", [True, False]),
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [15]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_SLIMElasticNet, n_trials=8)

[I 2024-11-18 23:28:08,831] Using an existing study with name 'hyperparameters_tuning_SLIMElasticNet' instead of creating a new one.


SLIMElasticNetRecommender: Processed 3359 ( 8.8%) in 5.00 min. Items per second: 11.19
SLIMElasticNetRecommender: Processed 6864 (18.0%) in 10.00 min. Items per second: 11.44
SLIMElasticNetRecommender: Processed 10544 (27.7%) in 15.00 min. Items per second: 11.71
SLIMElasticNetRecommender: Processed 14430 (37.9%) in 20.00 min. Items per second: 12.02
SLIMElasticNetRecommender: Processed 18142 (47.6%) in 25.00 min. Items per second: 12.09
SLIMElasticNetRecommender: Processed 21598 (56.7%) in 30.01 min. Items per second: 12.00
SLIMElasticNetRecommender: Processed 24777 (65.0%) in 35.01 min. Items per second: 11.80
SLIMElasticNetRecommender: Processed 27824 (73.0%) in 40.01 min. Items per second: 11.59
SLIMElasticNetRecommender: Processed 31344 (82.2%) in 45.01 min. Items per second: 11.61
SLIMElasticNetRecommender: Processed 35440 (93.0%) in 50.01 min. Items per second: 11.81
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 53.85 min. Items per second: 11.80
EvaluatorHoldout: Proce

[I 2024-11-19 00:22:38,670] Trial 34 finished with value: 0.059461107878088726 and parameters: {'topK': 887, 'l1_ratio': 0.12510544561751025, 'alpha': 0.0004324707593290768, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 3243 ( 8.5%) in 5.00 min. Items per second: 10.81
SLIMElasticNetRecommender: Processed 6412 (16.8%) in 10.00 min. Items per second: 10.68
SLIMElasticNetRecommender: Processed 9485 (24.9%) in 15.00 min. Items per second: 10.54
SLIMElasticNetRecommender: Processed 12881 (33.8%) in 20.00 min. Items per second: 10.73
SLIMElasticNetRecommender: Processed 16242 (42.6%) in 25.01 min. Items per second: 10.82
SLIMElasticNetRecommender: Processed 19490 (51.1%) in 30.01 min. Items per second: 10.83
SLIMElasticNetRecommender: Processed 22916 (60.1%) in 35.01 min. Items per second: 10.91
SLIMElasticNetRecommender: Processed 26349 (69.1%) in 40.01 min. Items per second: 10.98
SLIMElasticNetRecommender: Processed 29375 (77.1%) in 45.01 min. Items per second: 10.88
SLIMElasticNetRecommender: Processed 32513 (85.3%) in 50.01 min. Items per second: 10.84
SLIMElasticNetRecommender: Processed 35936 (94.3%) in 55.01 min. Items per second: 10.89
SLIMElasticNetRecommender

[I 2024-11-19 01:21:32,682] Trial 35 finished with value: 0.05995162286481106 and parameters: {'topK': 773, 'l1_ratio': 0.21883748022627142, 'alpha': 0.00022839635784026256, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 2779 ( 7.3%) in 5.00 min. Items per second: 9.26
SLIMElasticNetRecommender: Processed 5571 (14.6%) in 10.00 min. Items per second: 9.28
SLIMElasticNetRecommender: Processed 8217 (21.6%) in 15.00 min. Items per second: 9.13
SLIMElasticNetRecommender: Processed 11048 (29.0%) in 20.00 min. Items per second: 9.20
SLIMElasticNetRecommender: Processed 13816 (36.2%) in 25.01 min. Items per second: 9.21
SLIMElasticNetRecommender: Processed 16617 (43.6%) in 30.01 min. Items per second: 9.23
SLIMElasticNetRecommender: Processed 19448 (51.0%) in 35.01 min. Items per second: 9.26
SLIMElasticNetRecommender: Processed 22362 (58.7%) in 40.01 min. Items per second: 9.32
SLIMElasticNetRecommender: Processed 25032 (65.7%) in 45.01 min. Items per second: 9.27
SLIMElasticNetRecommender: Processed 27717 (72.7%) in 50.01 min. Items per second: 9.24
SLIMElasticNetRecommender: Processed 30506 (80.0%) in 55.01 min. Items per second: 9.24
SLIMElasticNetRecommender: Processed

[I 2024-11-19 02:30:13,287] Trial 36 finished with value: 0.06005641843757998 and parameters: {'topK': 1277, 'l1_ratio': 0.07497532995364076, 'alpha': 0.0002120691645109977, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 4593 (12.0%) in 5.00 min. Items per second: 15.31
SLIMElasticNetRecommender: Processed 9022 (23.7%) in 10.00 min. Items per second: 15.03
SLIMElasticNetRecommender: Processed 13593 (35.7%) in 15.00 min. Items per second: 15.10
SLIMElasticNetRecommender: Processed 18307 (48.0%) in 20.00 min. Items per second: 15.25
SLIMElasticNetRecommender: Processed 23111 (60.6%) in 25.00 min. Items per second: 15.40
SLIMElasticNetRecommender: Processed 27792 (72.9%) in 30.00 min. Items per second: 15.44
SLIMElasticNetRecommender: Processed 32346 (84.9%) in 35.00 min. Items per second: 15.40
SLIMElasticNetRecommender: Processed 36881 (96.7%) in 40.01 min. Items per second: 15.36
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 41.69 min. Items per second: 15.24
EvaluatorHoldout: Processed 35595 (100.0%) in 34.10 sec. Users per second: 1044


[I 2024-11-19 03:12:29,267] Trial 37 finished with value: 0.05788256108736297 and parameters: {'topK': 1179, 'l1_ratio': 0.7490281485290458, 'alpha': 0.0001445276409996166, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 2695 ( 7.1%) in 5.00 min. Items per second: 8.98
SLIMElasticNetRecommender: Processed 5474 (14.4%) in 10.00 min. Items per second: 9.12
SLIMElasticNetRecommender: Processed 8360 (21.9%) in 15.00 min. Items per second: 9.29
SLIMElasticNetRecommender: Processed 11237 (29.5%) in 20.00 min. Items per second: 9.36
SLIMElasticNetRecommender: Processed 14328 (37.6%) in 25.00 min. Items per second: 9.55
SLIMElasticNetRecommender: Processed 17337 (45.5%) in 30.01 min. Items per second: 9.63
SLIMElasticNetRecommender: Processed 20483 (53.7%) in 35.01 min. Items per second: 9.75
SLIMElasticNetRecommender: Processed 23662 (62.1%) in 40.01 min. Items per second: 9.86
SLIMElasticNetRecommender: Processed 26776 (70.2%) in 45.01 min. Items per second: 9.91
SLIMElasticNetRecommender: Processed 29769 (78.1%) in 50.01 min. Items per second: 9.92
SLIMElasticNetRecommender: Processed 32882 (86.3%) in 55.01 min. Items per second: 9.96
SLIMElasticNetRecommender: Processed

[I 2024-11-19 04:16:54,267] Trial 38 finished with value: 0.059965638566142336 and parameters: {'topK': 1062, 'l1_ratio': 0.3953077948050474, 'alpha': 0.00010470645827950036, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 3299 ( 8.7%) in 5.00 min. Items per second: 10.99
SLIMElasticNetRecommender: Processed 6805 (17.9%) in 10.00 min. Items per second: 11.34
SLIMElasticNetRecommender: Processed 10424 (27.3%) in 15.00 min. Items per second: 11.58
SLIMElasticNetRecommender: Processed 13920 (36.5%) in 20.00 min. Items per second: 11.60
SLIMElasticNetRecommender: Processed 17461 (45.8%) in 25.00 min. Items per second: 11.64
SLIMElasticNetRecommender: Processed 20959 (55.0%) in 30.00 min. Items per second: 11.64
SLIMElasticNetRecommender: Processed 24436 (64.1%) in 35.00 min. Items per second: 11.63
SLIMElasticNetRecommender: Processed 28094 (73.7%) in 40.01 min. Items per second: 11.70
SLIMElasticNetRecommender: Processed 31645 (83.0%) in 45.01 min. Items per second: 11.72
SLIMElasticNetRecommender: Processed 35614 (93.4%) in 50.01 min. Items per second: 11.87
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 53.11 min. Items per second: 11.96
EvaluatorHoldout: Proce

[I 2024-11-19 05:10:38,852] Trial 39 finished with value: 0.05944318691094748 and parameters: {'topK': 985, 'l1_ratio': 0.15070277348209812, 'alpha': 0.0003878916657655993, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 24622 (64.6%) in 5.00 min. Items per second: 82.07
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 7.88 min. Items per second: 80.63
EvaluatorHoldout: Processed 35595 (100.0%) in 24.71 sec. Users per second: 1440


[I 2024-11-19 05:18:56,530] Trial 40 finished with value: 0.006278734305915037 and parameters: {'topK': 885, 'l1_ratio': 0.5168917995011423, 'alpha': 0.006734050548967194, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


SLIMElasticNetRecommender: Processed 3358 ( 8.8%) in 5.00 min. Items per second: 11.19
SLIMElasticNetRecommender: Processed 6508 (17.1%) in 10.00 min. Items per second: 10.84
SLIMElasticNetRecommender: Processed 9681 (25.4%) in 15.00 min. Items per second: 10.75
SLIMElasticNetRecommender: Processed 13126 (34.4%) in 20.00 min. Items per second: 10.93
SLIMElasticNetRecommender: Processed 16665 (43.7%) in 25.01 min. Items per second: 11.11
SLIMElasticNetRecommender: Processed 20171 (52.9%) in 30.01 min. Items per second: 11.20
SLIMElasticNetRecommender: Processed 23607 (61.9%) in 35.01 min. Items per second: 11.24
SLIMElasticNetRecommender: Processed 27094 (71.1%) in 40.01 min. Items per second: 11.29
SLIMElasticNetRecommender: Processed 30342 (79.6%) in 45.01 min. Items per second: 11.24
SLIMElasticNetRecommender: Processed 33536 (88.0%) in 50.01 min. Items per second: 11.18
SLIMElasticNetRecommender: Processed 36927 (96.9%) in 55.01 min. Items per second: 11.19
SLIMElasticNetRecommender

[I 2024-11-19 06:16:33,961] Trial 41 finished with value: 0.060162308777983564 and parameters: {'topK': 1336, 'l1_ratio': 0.08478802846195718, 'alpha': 0.0002433983747738467, 'positive_only': True}. Best is trial 16 with value: 0.060174874079423415.


## Some optuna visualizations on recommender parameters

In [16]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [17]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [None]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)
    recommender_instance = SLIMElasticNetRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

# Testing

Create the recommendations for the submission. 

In [None]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [None]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)'
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/SLIMElasticNetRecommender/best_params_SLIMElasticNet.json' updated successfully.


Save the history of the tuned model.

In [None]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/SLIMElasticNetRecommender/history_SLIMElasticNet.db' updated successfully.


Save the best trained model and its submission.

In [None]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )