# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 4023, done.[K
remote: Counting objects: 100% (1496/1496), done.[K
remote: Compressing objects: 100% (469/469), done.[K
remote: Total 4023 (delta 822), reused 1485 (delta 816), pack-reused 2527 (from 1)[K
Receiving objects: 100% (4023/4023), 168.07 MiB | 28.81 MiB/s, done.
Resolving deltas: 100% (2341/2341), done.
Updating files: 100% (374/374), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__py

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/SLIM'

np.random.seed(42)

## Import the repository

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'SLIMElasticNet',
    'metric': 'Recall',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_SLIMElasticNet_Recall.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[50])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

def objective_function_SLIMElasticNet(optuna_trial):
    
    recommender_instance = SLIMElasticNetRecommender(URM_train)
    
    full_hyperp = {
                   "topK": optuna_trial.suggest_int("topK", 000, 1500),
                   "l1_ratio": optuna_trial.suggest_float("l1_ratio", 0.01, 1.0, log=True),
                    "alpha": optuna_trial.suggest_float("alpha", 1e-4, 1e-1, log=True),
                    "positive_only": optuna_trial.suggest_categorical("positive_only", [True, False]),
                  }        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[50]["RECALL"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_SLIMElasticNet, n_trials=10)

[I 2024-12-16 01:14:43,597] A new study created in RDB with name: hyperparameters_tuning_SLIMElasticNet_Recall


SLIMElasticNetRecommender: Processed 18723 (49.1%) in 5.00 min. Items per second: 62.40
SLIMElasticNetRecommender: Processed 37150 (97.5%) in 10.00 min. Items per second: 61.91
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 10.25 min. Items per second: 61.97
EvaluatorHoldout: Processed 35595 (100.0%) in 29.59 sec. Users per second: 1203


[I 2024-12-16 01:25:28,620] Trial 0 finished with value: 0.06604357908586696 and parameters: {'topK': 1450, 'l1_ratio': 0.5577251422563938, 'alpha': 0.0020317765251259665, 'positive_only': False}. Best is trial 0 with value: 0.06604357908586696.


SLIMElasticNetRecommender: Processed 9659 (25.3%) in 5.00 min. Items per second: 32.19
SLIMElasticNetRecommender: Processed 19953 (52.3%) in 10.00 min. Items per second: 33.25
SLIMElasticNetRecommender: Processed 29469 (77.3%) in 15.00 min. Items per second: 32.74
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 18.74 min. Items per second: 33.90
EvaluatorHoldout: Processed 35595 (100.0%) in 33.43 sec. Users per second: 1065


[I 2024-12-16 01:44:46,664] Trial 1 finished with value: 0.2012589873512001 and parameters: {'topK': 478, 'l1_ratio': 0.12295874097054221, 'alpha': 0.0022659111039030756, 'positive_only': False}. Best is trial 1 with value: 0.2012589873512001.


SLIMElasticNetRecommender: Processed 20390 (53.5%) in 5.00 min. Items per second: 67.96
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 9.43 min. Items per second: 67.40
EvaluatorHoldout: Processed 35595 (100.0%) in 28.37 sec. Users per second: 1255


[I 2024-12-16 01:54:40,794] Trial 2 finished with value: 0.00695557391949735 and parameters: {'topK': 915, 'l1_ratio': 0.08992550537422349, 'alpha': 0.08184720044488716, 'positive_only': True}. Best is trial 1 with value: 0.2012589873512001.


SLIMElasticNetRecommender: Processed 2135 ( 5.6%) in 5.00 min. Items per second: 7.11
SLIMElasticNetRecommender: Processed 4333 (11.4%) in 10.00 min. Items per second: 7.22
SLIMElasticNetRecommender: Processed 6543 (17.2%) in 15.00 min. Items per second: 7.27
SLIMElasticNetRecommender: Processed 8799 (23.1%) in 20.00 min. Items per second: 7.33
SLIMElasticNetRecommender: Processed 10996 (28.8%) in 25.00 min. Items per second: 7.33
SLIMElasticNetRecommender: Processed 13234 (34.7%) in 30.01 min. Items per second: 7.35
SLIMElasticNetRecommender: Processed 15532 (40.7%) in 35.01 min. Items per second: 7.39
SLIMElasticNetRecommender: Processed 17830 (46.8%) in 40.01 min. Items per second: 7.43
SLIMElasticNetRecommender: Processed 20002 (52.5%) in 45.01 min. Items per second: 7.41
SLIMElasticNetRecommender: Processed 22241 (58.3%) in 50.01 min. Items per second: 7.41
SLIMElasticNetRecommender: Processed 24484 (64.2%) in 55.01 min. Items per second: 7.42
SLIMElasticNetRecommender: Processed 

[I 2024-12-16 03:21:03,555] Trial 3 finished with value: 0.26619891213396246 and parameters: {'topK': 262, 'l1_ratio': 0.01018877015015616, 'alpha': 0.00032022108201524435, 'positive_only': True}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 1247 ( 3.3%) in 5.00 min. Items per second: 4.15
SLIMElasticNetRecommender: Processed 2429 ( 6.4%) in 10.00 min. Items per second: 4.05
SLIMElasticNetRecommender: Processed 3651 ( 9.6%) in 15.00 min. Items per second: 4.05
SLIMElasticNetRecommender: Processed 4917 (12.9%) in 20.01 min. Items per second: 4.10
SLIMElasticNetRecommender: Processed 6199 (16.3%) in 25.01 min. Items per second: 4.13
SLIMElasticNetRecommender: Processed 7493 (19.7%) in 30.01 min. Items per second: 4.16
SLIMElasticNetRecommender: Processed 8728 (22.9%) in 35.01 min. Items per second: 4.15
SLIMElasticNetRecommender: Processed 10037 (26.3%) in 40.02 min. Items per second: 4.18
SLIMElasticNetRecommender: Processed 11331 (29.7%) in 45.02 min. Items per second: 4.19
SLIMElasticNetRecommender: Processed 12617 (33.1%) in 50.02 min. Items per second: 4.20
SLIMElasticNetRecommender: Processed 13916 (36.5%) in 55.02 min. Items per second: 4.21
SLIMElasticNetRecommender: Processed 151

[I 2024-12-16 05:52:07,789] Trial 4 finished with value: 0.2476019791573958 and parameters: {'topK': 1128, 'l1_ratio': 0.01664484315476557, 'alpha': 0.00028021433726821394, 'positive_only': False}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 20097 (52.7%) in 5.00 min. Items per second: 66.98
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 9.53 min. Items per second: 66.64
EvaluatorHoldout: Processed 35595 (100.0%) in 30.21 sec. Users per second: 1178


[I 2024-12-16 06:02:10,241] Trial 5 finished with value: 0.005972113065949917 and parameters: {'topK': 1317, 'l1_ratio': 0.4549811668606217, 'alpha': 0.03839098584391007, 'positive_only': True}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 19620 (51.5%) in 5.00 min. Items per second: 65.40
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 9.66 min. Items per second: 65.78
EvaluatorHoldout: Processed 35595 (100.0%) in 27.76 sec. Users per second: 1282


[I 2024-12-16 06:12:17,795] Trial 6 finished with value: 0.00782328297981593 and parameters: {'topK': 251, 'l1_ratio': 0.1869583484641346, 'alpha': 0.033357146098161566, 'positive_only': True}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 19622 (51.5%) in 5.00 min. Items per second: 65.40
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 9.82 min. Items per second: 64.67
EvaluatorHoldout: Processed 35595 (100.0%) in 28.68 sec. Users per second: 1241


[I 2024-12-16 06:22:36,173] Trial 7 finished with value: 0.017098095131736792 and parameters: {'topK': 935, 'l1_ratio': 0.8068891662967625, 'alpha': 0.003995741863321068, 'positive_only': True}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 4345 (11.4%) in 5.00 min. Items per second: 14.48
SLIMElasticNetRecommender: Processed 8576 (22.5%) in 10.00 min. Items per second: 14.29
SLIMElasticNetRecommender: Processed 12980 (34.0%) in 15.00 min. Items per second: 14.42
SLIMElasticNetRecommender: Processed 17897 (46.9%) in 20.00 min. Items per second: 14.91
SLIMElasticNetRecommender: Processed 23103 (60.6%) in 25.00 min. Items per second: 15.40
SLIMElasticNetRecommender: Processed 27965 (73.4%) in 30.01 min. Items per second: 15.53
SLIMElasticNetRecommender: Processed 33542 (88.0%) in 35.01 min. Items per second: 15.97
SLIMElasticNetRecommender: Processed 38121 (100.0%) in 39.77 min. Items per second: 15.97
EvaluatorHoldout: Processed 35595 (100.0%) in 37.65 sec. Users per second: 945


[I 2024-12-16 07:03:00,323] Trial 8 finished with value: 0.2555804172170557 and parameters: {'topK': 1329, 'l1_ratio': 0.24282468040180388, 'alpha': 0.0005696529687597017, 'positive_only': False}. Best is trial 3 with value: 0.26619891213396246.


SLIMElasticNetRecommender: Processed 2175 ( 5.7%) in 5.00 min. Items per second: 7.24
SLIMElasticNetRecommender: Processed 4343 (11.4%) in 10.00 min. Items per second: 7.23
SLIMElasticNetRecommender: Processed 6441 (16.9%) in 15.01 min. Items per second: 7.15
SLIMElasticNetRecommender: Processed 8633 (22.6%) in 20.01 min. Items per second: 7.19
SLIMElasticNetRecommender: Processed 10752 (28.2%) in 25.01 min. Items per second: 7.17
SLIMElasticNetRecommender: Processed 12867 (33.8%) in 30.01 min. Items per second: 7.15
SLIMElasticNetRecommender: Processed 15029 (39.4%) in 35.01 min. Items per second: 7.15
SLIMElasticNetRecommender: Processed 17318 (45.4%) in 40.01 min. Items per second: 7.21
SLIMElasticNetRecommender: Processed 19676 (51.6%) in 45.01 min. Items per second: 7.29
SLIMElasticNetRecommender: Processed 22026 (57.8%) in 50.01 min. Items per second: 7.34
SLIMElasticNetRecommender: Processed 24341 (63.9%) in 55.01 min. Items per second: 7.37
SLIMElasticNetRecommender: Processed 

[I 2024-12-16 08:29:44,071] Trial 9 finished with value: 0.2759476213618708 and parameters: {'topK': 1431, 'l1_ratio': 0.1059130848473284, 'alpha': 0.00029464301780500544, 'positive_only': False}. Best is trial 9 with value: 0.2759476213618708.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = SLIMElasticNetRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

SLIMElasticNetRecommender: Processed 1673 ( 4.4%) in 5.00 min. Items per second: 5.57
SLIMElasticNetRecommender: Processed 3223 ( 8.5%) in 10.00 min. Items per second: 5.37
SLIMElasticNetRecommender: Processed 4908 (12.9%) in 15.01 min. Items per second: 5.45
SLIMElasticNetRecommender: Processed 6572 (17.2%) in 20.01 min. Items per second: 5.47
SLIMElasticNetRecommender: Processed 8251 (21.6%) in 25.01 min. Items per second: 5.50
SLIMElasticNetRecommender: Processed 10029 (26.3%) in 30.01 min. Items per second: 5.57
SLIMElasticNetRecommender: Processed 11631 (30.5%) in 35.02 min. Items per second: 5.54
SLIMElasticNetRecommender: Processed 13311 (34.9%) in 40.02 min. Items per second: 5.54
SLIMElasticNetRecommender: Processed 14961 (39.2%) in 45.02 min. Items per second: 5.54
SLIMElasticNetRecommender: Processed 16614 (43.6%) in 50.02 min. Items per second: 5.54
SLIMElasticNetRecommender: Processed 18169 (47.7%) in 55.02 min. Items per second: 5.50
SLIMElasticNetRecommender: Processed 1

# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_SLIMElasticNet_Recall.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithoutKFCV/SLIM/SLIMElasticNetRecommender/OptimizingRecall/best_params_SLIMElasticNet_Recall.json' created successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/SLIM/SLIMElasticNetRecommender/OptimizingRecall/history_SLIMElasticNet_Recall.db' created successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/SLIM/SLIMElasticNetRecommender/OptimizingRecall/Submission/submission_SLIMElasticNet_Recall.csv' created successfully.
