# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 2102, done.[K
remote: Counting objects: 100% (484/484), done.[K
remote: Compressing objects: 100% (275/275), done.[K
remote: Total 2102 (delta 308), reused 342 (delta 201), pack-reused 1618 (from 1)[K
Receiving objects: 100% (2102/2102), 144.72 MiB | 29.85 MiB/s, done.
Resolving deltas: 100% (1253/1253), done.
Updating files: 100% (238/238), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [None]:
%cd /kaggle/working/RECsys_Challenge2024
! python run_compile_all_cython.py

run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function '[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K':
30351 |       [01;35m[K__pyx_t_4 = (__pyx_v_start_pos_impression

In [None]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/Hybrid'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [None]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [8]:
config = {
    'model': 'ItemKNN_CFCBF_Hybrid',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNN_CFCBF_Hybrid.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [None]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [10]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [11]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [13]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [14]:
from Recommenders.KNN.ItemKNN_CFCBF_Hybrid_Recommender import ItemKNN_CFCBF_Hybrid_Recommender

def objective_function_ItemKNN_CFCBF_Hybrid(optuna_trial):
    
    recommender_instance = ItemKNN_CFCBF_Hybrid_Recommender(URM_train, ICM_all)
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"]),
                   "ICM_weight": optuna_trial.suggest_float("ICM_weight", 0.1, 20.0, log=True)
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [15]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ItemKNN_CFCBF_Hybrid, n_trials=100)

[I 2024-11-19 18:52:20,568] A new study created in RDB with name: hyperparameters_tuning_ItemKNN_CFCBF_Hybrid


Similarity column 38121 (100.0%), 627.52 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 55.19 sec. Users per second: 645


[I 2024-11-19 18:54:17,980] Trial 0 finished with value: 0.02207286224879953 and parameters: {'similarity': 'asymmetric', 'topK': 243, 'shrink': 21, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.3112772787354627, 'asymmetric_alpha': 1.0525560849843292}. Best is trial 0 with value: 0.02207286224879953.


Similarity column 26200 (68.7%), 87.00 column/sec. Elapsed time 5.02 min
Similarity column 38121 (100.0%), 87.17 column/sec. Elapsed time 7.29 min
EvaluatorHoldout: Processed 35595 (100.0%) in 43.09 sec. Users per second: 826


[I 2024-11-19 19:02:19,996] Trial 1 finished with value: 0.033098978365517805 and parameters: {'similarity': 'euclidean', 'topK': 582, 'shrink': 364, 'feature_weighting': 'BM25', 'ICM_weight': 0.44284718944886264, 'normalize_avg_row': True, 'similarity_from_distance_mode': 'log', 'normalize': False}. Best is trial 1 with value: 0.033098978365517805.


Similarity column 38121 (100.0%), 633.96 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 48.37 sec. Users per second: 736


[I 2024-11-19 19:04:10,591] Trial 2 finished with value: 0.03091009192926515 and parameters: {'similarity': 'dice', 'topK': 304, 'shrink': 675, 'feature_weighting': 'TF-IDF', 'ICM_weight': 1.7817792291826682}. Best is trial 1 with value: 0.033098978365517805.


Similarity column 38121 (100.0%), 636.13 column/sec. Elapsed time 59.93 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 48.44 sec. Users per second: 735


[I 2024-11-19 19:06:00,754] Trial 3 finished with value: 0.03057319001018882 and parameters: {'similarity': 'jaccard', 'topK': 318, 'shrink': 726, 'feature_weighting': 'none', 'ICM_weight': 3.934675036738022}. Best is trial 1 with value: 0.033098978365517805.


Similarity column 38121 (100.0%), 634.22 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 48.53 sec. Users per second: 733


[I 2024-11-19 19:07:51,021] Trial 4 finished with value: 0.026629733309252505 and parameters: {'similarity': 'cosine', 'topK': 314, 'shrink': 713, 'feature_weighting': 'TF-IDF', 'ICM_weight': 11.755398138291278}. Best is trial 1 with value: 0.033098978365517805.


Similarity column 38121 (100.0%), 619.11 column/sec. Elapsed time 1.03 min
EvaluatorHoldout: Processed 35595 (100.0%) in 50.78 sec. Users per second: 701


[I 2024-11-19 19:09:45,474] Trial 5 finished with value: 0.03002635580616961 and parameters: {'similarity': 'tversky', 'topK': 385, 'shrink': 858, 'feature_weighting': 'BM25', 'ICM_weight': 0.152955138782811, 'tversky_alpha': 1.2509612782579305, 'tversky_beta': 0.8177768511130263}. Best is trial 1 with value: 0.033098978365517805.


Similarity column 38121 (100.0%), 628.44 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 54.58 sec. Users per second: 652


[I 2024-11-19 19:11:42,968] Trial 6 finished with value: 0.03502069579060616 and parameters: {'similarity': 'asymmetric', 'topK': 662, 'shrink': 874, 'feature_weighting': 'TF-IDF', 'ICM_weight': 3.4743837163431155, 'asymmetric_alpha': 0.1636663388582551}. Best is trial 6 with value: 0.03502069579060616.


Similarity column 38121 (100.0%), 626.30 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 59.83 sec. Users per second: 595


[I 2024-11-19 19:13:46,114] Trial 7 finished with value: 0.028265542467396054 and parameters: {'similarity': 'asymmetric', 'topK': 691, 'shrink': 121, 'feature_weighting': 'BM25', 'ICM_weight': 3.842854923922256, 'asymmetric_alpha': 1.0026082248717516}. Best is trial 6 with value: 0.03502069579060616.


Similarity column 38121 (100.0%), 622.39 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 47.49 sec. Users per second: 750


[I 2024-11-19 19:15:36,722] Trial 8 finished with value: 0.03215853283299453 and parameters: {'similarity': 'tversky', 'topK': 293, 'shrink': 56, 'feature_weighting': 'none', 'ICM_weight': 17.048545043050023, 'tversky_alpha': 1.0436185719707491, 'tversky_beta': 0.4031235835164395}. Best is trial 6 with value: 0.03502069579060616.


Similarity column 38121 (100.0%), 621.65 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 44.16 sec. Users per second: 806


[I 2024-11-19 19:17:23,882] Trial 9 finished with value: 0.030842905303713673 and parameters: {'similarity': 'tversky', 'topK': 140, 'shrink': 666, 'feature_weighting': 'TF-IDF', 'ICM_weight': 3.437646417813053, 'tversky_alpha': 0.2885590943603238, 'tversky_beta': 0.8069322186724457}. Best is trial 6 with value: 0.03502069579060616.


Similarity column 38121 (100.0%), 628.06 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 51.71 sec. Users per second: 688


[I 2024-11-19 19:19:18,211] Trial 10 finished with value: 0.03938140388898763 and parameters: {'similarity': 'asymmetric', 'topK': 548, 'shrink': 968, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.7134734917584097, 'asymmetric_alpha': 0.1298903009168667}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 38121 (100.0%), 629.39 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 51.79 sec. Users per second: 687


[I 2024-11-19 19:21:12,578] Trial 11 finished with value: 0.03755551207700259 and parameters: {'similarity': 'asymmetric', 'topK': 550, 'shrink': 980, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.9291367813740339, 'asymmetric_alpha': 0.017970709983976152}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 38121 (100.0%), 618.25 column/sec. Elapsed time 1.03 min
EvaluatorHoldout: Processed 35595 (100.0%) in 51.25 sec. Users per second: 694


[I 2024-11-19 19:23:07,721] Trial 12 finished with value: 0.03801579943678405 and parameters: {'similarity': 'asymmetric', 'topK': 526, 'shrink': 972, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.6771982302232866, 'asymmetric_alpha': 0.022168283053468824}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 38121 (100.0%), 619.35 column/sec. Elapsed time 1.03 min
EvaluatorHoldout: Processed 35595 (100.0%) in 1.07 min. Users per second: 554


[I 2024-11-19 19:25:15,656] Trial 13 finished with value: 0.021343723592353724 and parameters: {'similarity': 'asymmetric', 'topK': 500, 'shrink': 996, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.761584804855989, 'asymmetric_alpha': 1.8726067832722688}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 38121 (100.0%), 628.36 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 51.05 sec. Users per second: 697


[I 2024-11-19 19:27:09,321] Trial 14 finished with value: 0.03133512041329079 and parameters: {'similarity': 'dice', 'topK': 438, 'shrink': 380, 'feature_weighting': 'none', 'ICM_weight': 0.20790877366762353}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 25600 (67.2%), 85.19 column/sec. Elapsed time 5.01 min
Similarity column 38121 (100.0%), 86.05 column/sec. Elapsed time 7.38 min
EvaluatorHoldout: Processed 35595 (100.0%) in 56.15 sec. Users per second: 634


[I 2024-11-19 19:35:30,449] Trial 15 finished with value: 0.01805843517347941 and parameters: {'similarity': 'euclidean', 'topK': 749, 'shrink': 526, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.4963799194916607, 'normalize_avg_row': False, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 10 with value: 0.03938140388898763.


Similarity column 38121 (100.0%), 629.18 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 54.20 sec. Users per second: 657


[I 2024-11-19 19:37:26,726] Trial 16 finished with value: 0.04441651783623822 and parameters: {'similarity': 'cosine', 'topK': 477, 'shrink': 860, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.10567010857190823}. Best is trial 16 with value: 0.04441651783623822.


Similarity column 38121 (100.0%), 642.60 column/sec. Elapsed time 59.32 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.73 sec. Users per second: 943


[I 2024-11-19 19:39:04,653] Trial 17 finished with value: 0.04916891194366774 and parameters: {'similarity': 'cosine', 'topK': 21, 'shrink': 828, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.10429040589027133}. Best is trial 17 with value: 0.04916891194366774.


Similarity column 38121 (100.0%), 636.36 column/sec. Elapsed time 59.91 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.01 sec. Users per second: 1047


[I 2024-11-19 19:40:39,634] Trial 18 finished with value: 0.05078664628748769 and parameters: {'similarity': 'cosine', 'topK': 4, 'shrink': 558, 'feature_weighting': 'BM25', 'ICM_weight': 0.10065599937842569}. Best is trial 18 with value: 0.05078664628748769.


Similarity column 38121 (100.0%), 688.98 column/sec. Elapsed time 55.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.70 sec. Users per second: 1056


[I 2024-11-19 19:42:09,593] Trial 19 finished with value: 0.05047409458703057 and parameters: {'similarity': 'cosine', 'topK': 3, 'shrink': 535, 'feature_weighting': 'BM25', 'ICM_weight': 0.10562012378980232}. Best is trial 18 with value: 0.05078664628748769.


Similarity column 38121 (100.0%), 636.94 column/sec. Elapsed time 59.85 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.63 sec. Users per second: 999


[I 2024-11-19 19:43:46,202] Trial 20 finished with value: 0.050114355279966744 and parameters: {'similarity': 'cosine', 'topK': 9, 'shrink': 485, 'feature_weighting': 'BM25', 'ICM_weight': 0.24283356639556103}. Best is trial 18 with value: 0.05078664628748769.


Similarity column 38121 (100.0%), 636.64 column/sec. Elapsed time 59.88 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.72 sec. Users per second: 1025


[I 2024-11-19 19:45:21,782] Trial 21 finished with value: 0.050922948202550194 and parameters: {'similarity': 'cosine', 'topK': 6, 'shrink': 511, 'feature_weighting': 'BM25', 'ICM_weight': 0.24115797725894528}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 628.76 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 43.24 sec. Users per second: 823


[I 2024-11-19 19:47:06,882] Trial 22 finished with value: 0.046470132464652536 and parameters: {'similarity': 'cosine', 'topK': 107, 'shrink': 546, 'feature_weighting': 'BM25', 'ICM_weight': 0.16567562966508267}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 624.86 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 42.15 sec. Users per second: 844


[I 2024-11-19 19:48:51,218] Trial 23 finished with value: 0.0464104732027176 and parameters: {'similarity': 'cosine', 'topK': 99, 'shrink': 333, 'feature_weighting': 'BM25', 'ICM_weight': 0.14064877445341442}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 627.02 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 45.32 sec. Users per second: 785


[I 2024-11-19 19:50:38,734] Trial 24 finished with value: 0.04624268278271459 and parameters: {'similarity': 'cosine', 'topK': 173, 'shrink': 273, 'feature_weighting': 'BM25', 'ICM_weight': 0.30064055801353234}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 630.24 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 41.03 sec. Users per second: 867


[I 2024-11-19 19:52:21,912] Trial 25 finished with value: 0.03416066662653158 and parameters: {'similarity': 'jaccard', 'topK': 66, 'shrink': 611, 'feature_weighting': 'BM25', 'ICM_weight': 0.11165488259948964}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 627.47 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 46.39 sec. Users per second: 767


[I 2024-11-19 19:54:10,549] Trial 26 finished with value: 0.046154937491218885 and parameters: {'similarity': 'cosine', 'topK': 212, 'shrink': 454, 'feature_weighting': 'BM25', 'ICM_weight': 0.3519841717312788}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 633.41 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 40.20 sec. Users per second: 886


[I 2024-11-19 19:55:52,193] Trial 27 finished with value: 0.043196421603710435 and parameters: {'similarity': 'cosine', 'topK': 55, 'shrink': 232, 'feature_weighting': 'BM25', 'ICM_weight': 1.5056843052298696}. Best is trial 21 with value: 0.050922948202550194.


Similarity column 38121 (100.0%), 635.13 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 34.28 sec. Users per second: 1038


[I 2024-11-19 19:57:27,471] Trial 28 finished with value: 0.051176340087000304 and parameters: {'similarity': 'cosine', 'topK': 5, 'shrink': 569, 'feature_weighting': 'BM25', 'ICM_weight': 0.18482335849666967}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 629.30 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 46.19 sec. Users per second: 771


[I 2024-11-19 19:59:15,505] Trial 29 finished with value: 0.04597429860177257 and parameters: {'similarity': 'cosine', 'topK': 215, 'shrink': 444, 'feature_weighting': 'BM25', 'ICM_weight': 0.25562815794692906}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 26700 (70.0%), 88.83 column/sec. Elapsed time 5.01 min
Similarity column 38121 (100.0%), 89.74 column/sec. Elapsed time 7.08 min
EvaluatorHoldout: Processed 35595 (100.0%) in 37.82 sec. Users per second: 941


[I 2024-11-19 20:06:59,012] Trial 30 finished with value: 0.021214598982824795 and parameters: {'similarity': 'euclidean', 'topK': 150, 'shrink': 646, 'feature_weighting': 'BM25', 'ICM_weight': 0.19063725874437443, 'normalize_avg_row': False, 'similarity_from_distance_mode': 'exp', 'normalize': False}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 641.49 column/sec. Elapsed time 59.43 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.02 sec. Users per second: 1016


[I 2024-11-19 20:08:34,551] Trial 31 finished with value: 0.050251322751321256 and parameters: {'similarity': 'cosine', 'topK': 8, 'shrink': 521, 'feature_weighting': 'BM25', 'ICM_weight': 0.14695033491541243}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 637.54 column/sec. Elapsed time 59.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.95 sec. Users per second: 869


[I 2024-11-19 20:10:16,475] Trial 32 finished with value: 0.047368127323035 and parameters: {'similarity': 'cosine', 'topK': 75, 'shrink': 591, 'feature_weighting': 'BM25', 'ICM_weight': 0.3659673352014763}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 643.72 column/sec. Elapsed time 59.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.65 sec. Users per second: 898


[I 2024-11-19 20:11:56,931] Trial 33 finished with value: 0.03560150346601957 and parameters: {'similarity': 'dice', 'topK': 51, 'shrink': 407, 'feature_weighting': 'BM25', 'ICM_weight': 0.46263519917736345}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 637.25 column/sec. Elapsed time 59.82 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.50 sec. Users per second: 838


[I 2024-11-19 20:13:41,053] Trial 34 finished with value: 0.033469591769843654 and parameters: {'similarity': 'jaccard', 'topK': 111, 'shrink': 575, 'feature_weighting': 'BM25', 'ICM_weight': 0.20726880127558292}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 758.01 column/sec. Elapsed time 50.29 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.24 sec. Users per second: 1039


[I 2024-11-19 20:15:06,219] Trial 35 finished with value: 0.03368459989698816 and parameters: {'similarity': 'cosine', 'topK': 2, 'shrink': 768, 'feature_weighting': 'none', 'ICM_weight': 0.133732637126558}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 631.88 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 45.83 sec. Users per second: 777


[I 2024-11-19 20:16:53,753] Trial 36 finished with value: 0.046121154656551124 and parameters: {'similarity': 'cosine', 'topK': 184, 'shrink': 339, 'feature_weighting': 'BM25', 'ICM_weight': 0.2718054715405088}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 28300 (74.2%), 94.06 column/sec. Elapsed time 5.01 min
Similarity column 38121 (100.0%), 94.27 column/sec. Elapsed time 6.74 min
EvaluatorHoldout: Processed 35595 (100.0%) in 39.11 sec. Users per second: 910


[I 2024-11-19 20:24:18,131] Trial 37 finished with value: 0.034606593801072116 and parameters: {'similarity': 'euclidean', 'topK': 43, 'shrink': 749, 'feature_weighting': 'BM25', 'ICM_weight': 0.17274097390889046, 'normalize_avg_row': True, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 628.65 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 47.80 sec. Users per second: 745


[I 2024-11-19 20:26:08,260] Trial 38 finished with value: 0.040747039556561844 and parameters: {'similarity': 'cosine', 'topK': 263, 'shrink': 630, 'feature_weighting': 'BM25', 'ICM_weight': 2.3343433133397244}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 633.39 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 43.04 sec. Users per second: 827


[I 2024-11-19 20:27:52,945] Trial 39 finished with value: 0.03301486966467911 and parameters: {'similarity': 'dice', 'topK': 120, 'shrink': 689, 'feature_weighting': 'none', 'ICM_weight': 0.10171136959119412}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 624.32 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 48.29 sec. Users per second: 737


[I 2024-11-19 20:29:44,618] Trial 40 finished with value: 0.032841209640197176 and parameters: {'similarity': 'tversky', 'topK': 351, 'shrink': 433, 'feature_weighting': 'BM25', 'ICM_weight': 0.1342231058009735, 'tversky_alpha': 1.9292987179528651, 'tversky_beta': 1.9902597435017206}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 642.14 column/sec. Elapsed time 59.37 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.98 sec. Users per second: 1047


[I 2024-11-19 20:31:19,079] Trial 41 finished with value: 0.05105325565165274 and parameters: {'similarity': 'cosine', 'topK': 5, 'shrink': 516, 'feature_weighting': 'BM25', 'ICM_weight': 0.13954691597035654}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 643.55 column/sec. Elapsed time 59.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.96 sec. Users per second: 869


[I 2024-11-19 20:33:00,332] Trial 42 finished with value: 0.03742265723070484 and parameters: {'similarity': 'cosine', 'topK': 78, 'shrink': 497, 'feature_weighting': 'BM25', 'ICM_weight': 7.415467580024546}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 641.99 column/sec. Elapsed time 59.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.74 sec. Users per second: 943


[I 2024-11-19 20:34:38,542] Trial 43 finished with value: 0.048264140005394016 and parameters: {'similarity': 'cosine', 'topK': 31, 'shrink': 580, 'feature_weighting': 'BM25', 'ICM_weight': 0.224351795143524}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 639.59 column/sec. Elapsed time 59.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.92 sec. Users per second: 870


[I 2024-11-19 20:36:20,809] Trial 44 finished with value: 0.03442906084098537 and parameters: {'similarity': 'jaccard', 'topK': 78, 'shrink': 490, 'feature_weighting': 'BM25', 'ICM_weight': 0.13080053732905814}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 643.29 column/sec. Elapsed time 59.26 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.67 sec. Users per second: 1057


[I 2024-11-19 20:37:54,832] Trial 45 finished with value: 0.051142299725526176 and parameters: {'similarity': 'cosine', 'topK': 4, 'shrink': 556, 'feature_weighting': 'BM25', 'ICM_weight': 0.17395873065056786}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 631.56 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 47.92 sec. Users per second: 743


[I 2024-11-19 20:39:44,110] Trial 46 finished with value: 0.03495362399302431 and parameters: {'similarity': 'cosine', 'topK': 160, 'shrink': 682, 'feature_weighting': 'none', 'ICM_weight': 0.3775381968438932}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 629.88 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 38.63 sec. Users per second: 921


[I 2024-11-19 20:41:24,902] Trial 47 finished with value: 0.033581241346095005 and parameters: {'similarity': 'tversky', 'topK': 50, 'shrink': 792, 'feature_weighting': 'BM25', 'ICM_weight': 0.18320512860035093, 'tversky_alpha': 0.08337365810473107, 'tversky_beta': 1.8648751242811732}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 628.59 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 54.67 sec. Users per second: 651


[I 2024-11-19 20:43:22,127] Trial 48 finished with value: 0.04339510074760621 and parameters: {'similarity': 'cosine', 'topK': 622, 'shrink': 575, 'feature_weighting': 'BM25', 'ICM_weight': 1.0289989057659668}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 639.72 column/sec. Elapsed time 59.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.55 sec. Users per second: 857


[I 2024-11-19 20:45:04,513] Trial 49 finished with value: 0.04676324813766695 and parameters: {'similarity': 'cosine', 'topK': 94, 'shrink': 715, 'feature_weighting': 'BM25', 'ICM_weight': 0.5781308555318807}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 29200 (76.6%), 97.12 column/sec. Elapsed time 5.01 min
Similarity column 38121 (100.0%), 97.25 column/sec. Elapsed time 6.53 min
EvaluatorHoldout: Processed 35595 (100.0%) in 31.48 sec. Users per second: 1131


[I 2024-11-19 20:52:08,869] Trial 50 finished with value: 0.019659306528248877 and parameters: {'similarity': 'euclidean', 'topK': 131, 'shrink': 393, 'feature_weighting': 'BM25', 'ICM_weight': 6.052197793177692, 'normalize_avg_row': True, 'similarity_from_distance_mode': 'log', 'normalize': False}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 641.19 column/sec. Elapsed time 59.45 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.62 sec. Users per second: 1028


[I 2024-11-19 20:53:44,054] Trial 51 finished with value: 0.050344151688861485 and parameters: {'similarity': 'cosine', 'topK': 7, 'shrink': 540, 'feature_weighting': 'BM25', 'ICM_weight': 0.11913722902495018}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 643.57 column/sec. Elapsed time 59.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.77 sec. Users per second: 942


[I 2024-11-19 20:55:22,184] Trial 52 finished with value: 0.04780289946197912 and parameters: {'similarity': 'cosine', 'topK': 34, 'shrink': 473, 'feature_weighting': 'BM25', 'ICM_weight': 0.15981935570035524}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 890.73 column/sec. Elapsed time 42.80 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.92 sec. Users per second: 1275


[I 2024-11-19 20:56:33,899] Trial 53 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 631, 'feature_weighting': 'BM25', 'ICM_weight': 0.3079864442276722}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 642.54 column/sec. Elapsed time 59.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.71 sec. Users per second: 944


[I 2024-11-19 20:58:12,043] Trial 54 finished with value: 0.04810626046550864 and parameters: {'similarity': 'cosine', 'topK': 32, 'shrink': 528, 'feature_weighting': 'BM25', 'ICM_weight': 0.22342552002669835}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 634.90 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 47.14 sec. Users per second: 755


[I 2024-11-19 21:00:00,133] Trial 55 finished with value: 0.03598648040009057 and parameters: {'similarity': 'asymmetric', 'topK': 86, 'shrink': 554, 'feature_weighting': 'none', 'ICM_weight': 0.10294256210344074, 'asymmetric_alpha': 1.9746161803773963}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 646.38 column/sec. Elapsed time 58.98 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.59 sec. Users per second: 922


[I 2024-11-19 21:01:39,374] Trial 56 finished with value: 0.0350310369969016 and parameters: {'similarity': 'dice', 'topK': 39, 'shrink': 611, 'feature_weighting': 'BM25', 'ICM_weight': 0.1646465194338939}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 645.24 column/sec. Elapsed time 59.08 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.16 sec. Users per second: 886


[I 2024-11-19 21:03:19,609] Trial 57 finished with value: 0.046775937185754 and parameters: {'similarity': 'cosine', 'topK': 68, 'shrink': 423, 'feature_weighting': 'BM25', 'ICM_weight': 0.12730976177131328}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 633.68 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 49.71 sec. Users per second: 716


[I 2024-11-19 21:05:11,326] Trial 58 finished with value: 0.04534787300695237 and parameters: {'similarity': 'cosine', 'topK': 393, 'shrink': 301, 'feature_weighting': 'BM25', 'ICM_weight': 0.26577428998529107}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 626.49 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 44.36 sec. Users per second: 802


[I 2024-11-19 21:06:58,277] Trial 59 finished with value: 0.032175879660286795 and parameters: {'similarity': 'tversky', 'topK': 131, 'shrink': 660, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.18542148332739103, 'tversky_alpha': 1.9026629450296695, 'tversky_beta': 0.09261013478714619}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 638.09 column/sec. Elapsed time 59.74 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.03 sec. Users per second: 961


[I 2024-11-19 21:08:36,716] Trial 60 finished with value: 0.03578058493144767 and parameters: {'similarity': 'jaccard', 'topK': 21, 'shrink': 458, 'feature_weighting': 'BM25', 'ICM_weight': 0.40546376907786197}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 695.50 column/sec. Elapsed time 54.81 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.12 sec. Users per second: 1075


[I 2024-11-19 21:10:05,709] Trial 61 finished with value: 0.05051442261598059 and parameters: {'similarity': 'cosine', 'topK': 3, 'shrink': 541, 'feature_weighting': 'BM25', 'ICM_weight': 0.118386888635847}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 820.30 column/sec. Elapsed time 46.47 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.29 sec. Users per second: 1138


[I 2024-11-19 21:11:24,533] Trial 62 finished with value: 0.041422335043488114 and parameters: {'similarity': 'cosine', 'topK': 1, 'shrink': 501, 'feature_weighting': 'BM25', 'ICM_weight': 0.1495918683157767}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 642.51 column/sec. Elapsed time 59.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.11 sec. Users per second: 910


[I 2024-11-19 21:13:04,139] Trial 63 finished with value: 0.046940579312680475 and parameters: {'similarity': 'cosine', 'topK': 56, 'shrink': 551, 'feature_weighting': 'BM25', 'ICM_weight': 0.11895474586414255}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 645.55 column/sec. Elapsed time 59.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.76 sec. Users per second: 943


[I 2024-11-19 21:14:42,010] Trial 64 finished with value: 0.04763580358396898 and parameters: {'similarity': 'cosine', 'topK': 33, 'shrink': 509, 'feature_weighting': 'BM25', 'ICM_weight': 0.10000435263081353}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 642.97 column/sec. Elapsed time 59.29 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.62 sec. Users per second: 855


[I 2024-11-19 21:16:24,180] Trial 65 finished with value: 0.04671455327460212 and parameters: {'similarity': 'cosine', 'topK': 104, 'shrink': 608, 'feature_weighting': 'BM25', 'ICM_weight': 0.21043913664749023}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 644.31 column/sec. Elapsed time 59.17 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.85 sec. Users per second: 893


[I 2024-11-19 21:18:04,133] Trial 66 finished with value: 0.04685922425344919 and parameters: {'similarity': 'cosine', 'topK': 68, 'shrink': 563, 'feature_weighting': 'BM25', 'ICM_weight': 0.14696924276280202}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 643.67 column/sec. Elapsed time 59.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.21 sec. Users per second: 957


[I 2024-11-19 21:19:41,605] Trial 67 finished with value: 0.04869108934953667 and parameters: {'similarity': 'cosine', 'topK': 24, 'shrink': 152, 'feature_weighting': 'BM25', 'ICM_weight': 0.30480566908766}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 635.85 column/sec. Elapsed time 59.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 47.02 sec. Users per second: 757


[I 2024-11-19 21:21:29,546] Trial 68 finished with value: 0.020338395797519 and parameters: {'similarity': 'asymmetric', 'topK': 53, 'shrink': 355, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.1234319729815472, 'asymmetric_alpha': 1.5124594582231354}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 634.89 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 45.58 sec. Users per second: 781


[I 2024-11-19 21:23:16,610] Trial 69 finished with value: 0.04583166331101252 and parameters: {'similarity': 'cosine', 'topK': 193, 'shrink': 466, 'feature_weighting': 'BM25', 'ICM_weight': 0.17619239602932796}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 206.70 column/sec. Elapsed time 3.07 min
EvaluatorHoldout: Processed 35595 (100.0%) in 31.26 sec. Users per second: 1139


[I 2024-11-19 21:26:52,814] Trial 70 finished with value: 0.04108776647335279 and parameters: {'similarity': 'euclidean', 'topK': 1, 'shrink': 595, 'feature_weighting': 'none', 'ICM_weight': 0.23823642600680484, 'normalize_avg_row': False, 'similarity_from_distance_mode': 'exp', 'normalize': True}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 824.44 column/sec. Elapsed time 46.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.53 sec. Users per second: 1129


[I 2024-11-19 21:28:11,446] Trial 71 finished with value: 0.04140166712374721 and parameters: {'similarity': 'cosine', 'topK': 1, 'shrink': 535, 'feature_weighting': 'BM25', 'ICM_weight': 0.128927940155802}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 636.15 column/sec. Elapsed time 59.92 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.54 sec. Users per second: 948


[I 2024-11-19 21:29:50,084] Trial 72 finished with value: 0.048206556565594215 and parameters: {'similarity': 'cosine', 'topK': 23, 'shrink': 526, 'feature_weighting': 'BM25', 'ICM_weight': 0.11575877633995638}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 634.52 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 37.20 sec. Users per second: 957


[I 2024-11-19 21:31:28,564] Trial 73 finished with value: 0.0484950100000654 and parameters: {'similarity': 'cosine', 'topK': 20, 'shrink': 650, 'feature_weighting': 'BM25', 'ICM_weight': 0.15209424754989115}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 635.66 column/sec. Elapsed time 59.97 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.34 sec. Users per second: 861


[I 2024-11-19 21:33:11,113] Trial 74 finished with value: 0.03625918790984 and parameters: {'similarity': 'cosine', 'topK': 86, 'shrink': 416, 'feature_weighting': 'BM25', 'ICM_weight': 19.81297272711474}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 638.43 column/sec. Elapsed time 59.71 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.59 sec. Users per second: 899


[I 2024-11-19 21:34:51,529] Trial 75 finished with value: 0.04694245000523807 and parameters: {'similarity': 'cosine', 'topK': 53, 'shrink': 482, 'feature_weighting': 'BM25', 'ICM_weight': 0.11329756303353368}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 637.64 column/sec. Elapsed time 59.78 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.52 sec. Users per second: 837


[I 2024-11-19 21:36:35,662] Trial 76 finished with value: 0.033716868786189395 and parameters: {'similarity': 'dice', 'topK': 110, 'shrink': 549, 'feature_weighting': 'BM25', 'ICM_weight': 0.18537574700912052}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 642.11 column/sec. Elapsed time 59.37 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.85 sec. Users per second: 916


[I 2024-11-19 21:38:14,817] Trial 77 finished with value: 0.04771237377284395 and parameters: {'similarity': 'cosine', 'topK': 41, 'shrink': 621, 'feature_weighting': 'BM25', 'ICM_weight': 0.2054537493177338}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 639.45 column/sec. Elapsed time 59.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.09 sec. Users per second: 960


[I 2024-11-19 21:39:52,674] Trial 78 finished with value: 0.03934556752887871 and parameters: {'similarity': 'cosine', 'topK': 17, 'shrink': 13, 'feature_weighting': 'BM25', 'ICM_weight': 2.6043860291672165}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 635.74 column/sec. Elapsed time 59.96 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.10 sec. Users per second: 888


[I 2024-11-19 21:41:34,487] Trial 79 finished with value: 0.034914612583806445 and parameters: {'similarity': 'jaccard', 'topK': 67, 'shrink': 442, 'feature_weighting': 'BM25', 'ICM_weight': 0.1140397522299771}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 621.05 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 1.01 min. Users per second: 588


[I 2024-11-19 21:43:39,024] Trial 80 finished with value: 0.043992543985799895 and parameters: {'similarity': 'cosine', 'topK': 741, 'shrink': 694, 'feature_weighting': 'TF-IDF', 'ICM_weight': 0.14217336389425855}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 640.43 column/sec. Elapsed time 59.52 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.04 sec. Users per second: 988


[I 2024-11-19 21:45:15,716] Trial 81 finished with value: 0.04975486904037113 and parameters: {'similarity': 'cosine', 'topK': 10, 'shrink': 506, 'feature_weighting': 'BM25', 'ICM_weight': 0.15661300226420313}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 639.17 column/sec. Elapsed time 59.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.14 sec. Users per second: 958


[I 2024-11-19 21:46:53,457] Trial 82 finished with value: 0.04844184019067972 and parameters: {'similarity': 'cosine', 'topK': 20, 'shrink': 584, 'feature_weighting': 'BM25', 'ICM_weight': 0.13728461759213437}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 638.44 column/sec. Elapsed time 59.71 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.21 sec. Users per second: 908


[I 2024-11-19 21:48:33,565] Trial 83 finished with value: 0.04771496910792976 and parameters: {'similarity': 'cosine', 'topK': 48, 'shrink': 515, 'feature_weighting': 'BM25', 'ICM_weight': 0.26590837772644116}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 636.55 column/sec. Elapsed time 59.89 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.56 sec. Users per second: 857


[I 2024-11-19 21:50:16,017] Trial 84 finished with value: 0.04664630086711662 and parameters: {'similarity': 'cosine', 'topK': 93, 'shrink': 382, 'feature_weighting': 'BM25', 'ICM_weight': 0.1728283966560544}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 635.92 column/sec. Elapsed time 59.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.49 sec. Users per second: 925


[I 2024-11-19 21:51:55,625] Trial 85 finished with value: 0.047318310936303544 and parameters: {'similarity': 'cosine', 'topK': 38, 'shrink': 559, 'feature_weighting': 'BM25', 'ICM_weight': 0.11115547348385228}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 621.01 column/sec. Elapsed time 1.02 min
EvaluatorHoldout: Processed 35595 (100.0%) in 40.19 sec. Users per second: 886


[I 2024-11-19 21:53:39,028] Trial 86 finished with value: 0.034583947049811735 and parameters: {'similarity': 'tversky', 'topK': 71, 'shrink': 476, 'feature_weighting': 'BM25', 'ICM_weight': 1.0525579049088518, 'tversky_alpha': 0.5262022545196658, 'tversky_beta': 1.4308045157336124}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 634.71 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 38.41 sec. Users per second: 927


[I 2024-11-19 21:55:18,338] Trial 87 finished with value: 0.039089048533210616 and parameters: {'similarity': 'asymmetric', 'topK': 15, 'shrink': 532, 'feature_weighting': 'none', 'ICM_weight': 0.19756274569271262, 'asymmetric_alpha': 0.6067628729749083}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 887.61 column/sec. Elapsed time 42.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 28.04 sec. Users per second: 1269


[I 2024-11-19 21:56:30,381] Trial 88 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 592, 'feature_weighting': 'BM25', 'ICM_weight': 0.12424697193133706}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 28300 (74.2%), 94.08 column/sec. Elapsed time 5.01 min
Similarity column 38121 (100.0%), 94.31 column/sec. Elapsed time 6.74 min
EvaluatorHoldout: Processed 35595 (100.0%) in 42.67 sec. Users per second: 834


[I 2024-11-19 22:03:58,199] Trial 89 finished with value: 0.029064993745776604 and parameters: {'similarity': 'euclidean', 'topK': 124, 'shrink': 901, 'feature_weighting': 'BM25', 'ICM_weight': 0.10223925167575872, 'normalize_avg_row': False, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 631.48 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 47.82 sec. Users per second: 744


[I 2024-11-19 22:05:47,916] Trial 90 finished with value: 0.04559605638387585 and parameters: {'similarity': 'cosine', 'topK': 290, 'shrink': 578, 'feature_weighting': 'BM25', 'ICM_weight': 0.23616276401067748}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 637.92 column/sec. Elapsed time 59.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.83 sec. Users per second: 917


[I 2024-11-19 22:07:27,712] Trial 91 finished with value: 0.048173044635748286 and parameters: {'similarity': 'cosine', 'topK': 38, 'shrink': 490, 'feature_weighting': 'BM25', 'ICM_weight': 0.3463576322163512}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 638.41 column/sec. Elapsed time 59.71 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.05 sec. Users per second: 889


[I 2024-11-19 22:09:08,680] Trial 92 finished with value: 0.04707620229343596 and parameters: {'similarity': 'cosine', 'topK': 59, 'shrink': 521, 'feature_weighting': 'BM25', 'ICM_weight': 0.15855389086752403}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 626.27 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 51.58 sec. Users per second: 690


[I 2024-11-19 22:11:03,093] Trial 93 finished with value: 0.045099175691251346 and parameters: {'similarity': 'cosine', 'topK': 454, 'shrink': 441, 'feature_weighting': 'BM25', 'ICM_weight': 0.5558419175613543}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 636.75 column/sec. Elapsed time 59.87 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.36 sec. Users per second: 953


[I 2024-11-19 22:12:41,242] Trial 94 finished with value: 0.04826264724178606 and parameters: {'similarity': 'cosine', 'topK': 23, 'shrink': 638, 'feature_weighting': 'BM25', 'ICM_weight': 0.13382898984987318}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 640.22 column/sec. Elapsed time 59.54 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.21 sec. Users per second: 983


[I 2024-11-19 22:14:18,595] Trial 95 finished with value: 0.03554115635110035 and parameters: {'similarity': 'dice', 'topK': 12, 'shrink': 547, 'feature_weighting': 'BM25', 'ICM_weight': 0.24273599955716293}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 633.74 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 41.34 sec. Users per second: 861


[I 2024-11-19 22:16:01,086] Trial 96 finished with value: 0.04690386780736369 and parameters: {'similarity': 'cosine', 'topK': 79, 'shrink': 608, 'feature_weighting': 'BM25', 'ICM_weight': 0.20007390114610338}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 631.32 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 38.87 sec. Users per second: 916


[I 2024-11-19 22:17:41,484] Trial 97 finished with value: 0.04759566284724133 and parameters: {'similarity': 'cosine', 'topK': 41, 'shrink': 398, 'feature_weighting': 'BM25', 'ICM_weight': 0.17044288498786464}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 634.61 column/sec. Elapsed time 1.00 min
EvaluatorHoldout: Processed 35595 (100.0%) in 40.08 sec. Users per second: 888


[I 2024-11-19 22:19:23,393] Trial 98 finished with value: 0.03498101659542734 and parameters: {'similarity': 'jaccard', 'topK': 57, 'shrink': 456, 'feature_weighting': 'BM25', 'ICM_weight': 0.14133022098751763}. Best is trial 28 with value: 0.051176340087000304.


Similarity column 38121 (100.0%), 627.35 column/sec. Elapsed time 1.01 min
EvaluatorHoldout: Processed 35595 (100.0%) in 48.16 sec. Users per second: 739


[I 2024-11-19 22:21:13,368] Trial 99 finished with value: 0.03611153140377611 and parameters: {'similarity': 'cosine', 'topK': 152, 'shrink': 493, 'feature_weighting': 'none', 'ICM_weight': 0.32870558303849406}. Best is trial 28 with value: 0.051176340087000304.


## Some optuna visualizations on recommender parameters

In [16]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [17]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [None]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH + '/' + GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ItemKNN_CFCBF_Hybrid_Recommender(URM_train + URM_validation, ICM_all)
    recommender_instance.fit(**best_params)

Similarity column 38121 (100.0%), 632.02 column/sec. Elapsed time 1.01 min


# Testing

Create the recommendations for the submission. 

In [None]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}.csv')

Submission file saved as /kaggle/working/submission_ItemKNN_CFCBF_Hybrid.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [None]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/ItemKNN_CFCBF_HybridRecommender/best_params_ItemKNN_CFCBF_Hybrid.json' created successfully.


Save the history of the tuned model.

In [None]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/ItemKNN_CFCBF_HybridRecommender/history_ItemKNN_CFCBF_Hybrid.db' created successfully.


Save the best trained model and its submission.

In [None]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Submission/submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/ItemKNN_CFCBF_HybridRecommender/Submission/submission_ItemKNN_CFCBF_Hybrid.csv' created successfully.
ItemKNN_CFCBF_HybridRecommender: Saving model in file '/kaggle/working/best_ItemKNN_CFCBF_Hybrid_tuned'
ItemKNN_CFCBF_HybridRecommender: Saving complete
File 'TrainedModels/ItemKNN_CFCBF_HybridRecommender/best_ItemKNN_CFCBF_Hybrid_tuned.zip' created successfully.
