# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 4164, done.[K
remote: Counting objects: 100% (1637/1637), done.[K
remote: Compressing objects: 100% (580/580), done.[K
remote: Total 4164 (delta 883), reused 1510 (delta 824), pack-reused 2527 (from 1)[K
Receiving objects: 100% (4164/4164), 170.92 MiB | 16.17 MiB/s, done.
Resolving deltas: 100% (2402/2402), done.
Updating files: 100% (390/390), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/KNN'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'ItemKNNCF',
    'metric': 'Recall',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNNCF_Recall.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[50])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender

def objective_function_ItemKNNCF(optuna_trial):
    
    recommender_instance = ItemKNNCFRecommender(URM_train)
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[50, "RECALL"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ItemKNNCF, n_trials=100)

[I 2024-12-16 10:40:38,493] Using an existing study with name 'hyperparameters_tuning_ItemKNNCF_Recall' instead of creating a new one.


Similarity column 38121 (100.0%), 2001.21 column/sec. Elapsed time 19.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.63 sec. Users per second: 1091


[I 2024-12-16 10:41:30,827] Trial 300 finished with value: 0.2360804498143627 and parameters: {'similarity': 'dice', 'topK': 3, 'shrink': 176, 'feature_weighting': 'BM25'}. Best is trial 287 with value: 0.26200885013113334.


Similarity column 38121 (100.0%), 2670.38 column/sec. Elapsed time 14.28 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.21 sec. Users per second: 983


[I 2024-12-16 10:42:21,883] Trial 301 finished with value: 0.26177580037568643 and parameters: {'similarity': 'tversky', 'topK': 21, 'shrink': 296, 'feature_weighting': 'BM25', 'tversky_alpha': 0.22027502726794285, 'tversky_beta': 1.2883012731193733}. Best is trial 287 with value: 0.26200885013113334.


Similarity column 38121 (100.0%), 3419.69 column/sec. Elapsed time 11.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 26.83 sec. Users per second: 1327


[I 2024-12-16 10:43:00,385] Trial 302 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 297, 'feature_weighting': 'BM25', 'tversky_alpha': 0.23083388472478575, 'tversky_beta': 1.3288522302797774}. Best is trial 287 with value: 0.26200885013113334.


Similarity column 38121 (100.0%), 2685.06 column/sec. Elapsed time 14.20 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.09 sec. Users per second: 986


[I 2024-12-16 10:43:51,228] Trial 303 finished with value: 0.2622649958577045 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 256, 'feature_weighting': 'BM25', 'tversky_alpha': 0.18125339646592215, 'tversky_beta': 1.3144863060100058}. Best is trial 303 with value: 0.2622649958577045.


Similarity column 38121 (100.0%), 2696.22 column/sec. Elapsed time 14.14 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.73 sec. Users per second: 996


[I 2024-12-16 10:44:41,649] Trial 304 finished with value: 0.26234437599930654 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 275, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1745587699794623, 'tversky_beta': 1.2379368730353641}. Best is trial 304 with value: 0.26234437599930654.


Similarity column 38121 (100.0%), 2660.80 column/sec. Elapsed time 14.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.80 sec. Users per second: 994


[I 2024-12-16 10:45:32,330] Trial 305 finished with value: 0.26241894023141876 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 244, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15681665128332833, 'tversky_beta': 1.2662411382694367}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2679.87 column/sec. Elapsed time 14.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.99 sec. Users per second: 989


[I 2024-12-16 10:46:23,122] Trial 306 finished with value: 0.2623658495143866 and parameters: {'similarity': 'tversky', 'topK': 18, 'shrink': 241, 'feature_weighting': 'BM25', 'tversky_alpha': 0.17993488517277703, 'tversky_beta': 1.289241257340143}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 3393.76 column/sec. Elapsed time 11.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 26.83 sec. Users per second: 1326


[I 2024-12-16 10:47:01,664] Trial 307 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 264, 'feature_weighting': 'none', 'tversky_alpha': 0.18955308887398664, 'tversky_beta': 1.2705096027955607}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2693.63 column/sec. Elapsed time 14.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.37 sec. Users per second: 1006


[I 2024-12-16 10:47:51,751] Trial 308 finished with value: 0.2618409480924195 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 236, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09378577806903492, 'tversky_beta': 1.2425017118938881}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2671.52 column/sec. Elapsed time 14.27 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.34 sec. Users per second: 1007


[I 2024-12-16 10:48:41,918] Trial 309 finished with value: 0.26198007329720024 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 246, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06870215091938682, 'tversky_beta': 1.231051757993132}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2693.18 column/sec. Elapsed time 14.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.84 sec. Users per second: 1022


[I 2024-12-16 10:49:31,462] Trial 310 finished with value: 0.2605240505338723 and parameters: {'similarity': 'tversky', 'topK': 11, 'shrink': 243, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09581875371835619, 'tversky_beta': 1.2651450714885122}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2745.91 column/sec. Elapsed time 13.88 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 49.04 sec. Users per second: 726


[I 2024-12-16 10:50:35,373] Trial 311 finished with value: 0.22946188011081317 and parameters: {'similarity': 'asymmetric', 'topK': 486, 'shrink': 219, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.026349949279738327}. Best is trial 305 with value: 0.26241894023141876.


Similarity column 38121 (100.0%), 2736.01 column/sec. Elapsed time 13.93 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.70 sec. Users per second: 1026


[I 2024-12-16 10:51:24,549] Trial 312 finished with value: 0.2627796224180537 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 235, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08242179825800577, 'tversky_beta': 1.2780251065915171}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2767.19 column/sec. Elapsed time 13.78 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.31 sec. Users per second: 1038


[I 2024-12-16 10:52:13,148] Trial 313 finished with value: 0.24528589716673307 and parameters: {'similarity': 'tversky', 'topK': 12, 'shrink': 244, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.001994688465429967, 'tversky_beta': 1.2307470068764848}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3259.00 column/sec. Elapsed time 11.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 29.56 sec. Users per second: 1204


[I 2024-12-16 10:52:54,955] Trial 314 finished with value: 0.16496795144283805 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 253, 'feature_weighting': 'BM25', 'tversky_alpha': 0.054660491169505915, 'tversky_beta': 1.1791069534003968}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2764.58 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 43.15 sec. Users per second: 825


[I 2024-12-16 10:53:52,709] Trial 315 finished with value: 0.24301660951476778 and parameters: {'similarity': 'tversky', 'topK': 239, 'shrink': 230, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14300669295672475, 'tversky_beta': 1.2702748802761712}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2869.68 column/sec. Elapsed time 13.28 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.15 sec. Users per second: 1042


[I 2024-12-16 10:54:40,664] Trial 316 finished with value: 0.2617957070453814 and parameters: {'similarity': 'tversky', 'topK': 16, 'shrink': 272, 'feature_weighting': 'BM25', 'tversky_alpha': 0.18762872942362535, 'tversky_beta': 1.1415652117416923}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2821.84 column/sec. Elapsed time 13.51 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.12 sec. Users per second: 1043


[I 2024-12-16 10:55:28,814] Trial 317 finished with value: 0.26140419412496596 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 267, 'feature_weighting': 'BM25', 'tversky_alpha': 0.16982556489576098, 'tversky_beta': 1.245350475649043}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3635.49 column/sec. Elapsed time 10.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.65 sec. Users per second: 1388


[I 2024-12-16 10:56:05,456] Trial 318 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 272, 'feature_weighting': 'BM25', 'tversky_alpha': 0.17248735893509665, 'tversky_beta': 1.1385217336167839}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2793.00 column/sec. Elapsed time 13.65 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.98 sec. Users per second: 1048


[I 2024-12-16 10:56:53,603] Trial 319 finished with value: 0.26121689661160397 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 202, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12934141966802484, 'tversky_beta': 1.243834159043106}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2798.93 column/sec. Elapsed time 13.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.11 sec. Users per second: 1075


[I 2024-12-16 10:57:40,863] Trial 320 finished with value: 0.2596579946584156 and parameters: {'similarity': 'tversky', 'topK': 11, 'shrink': 145, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12043855528059386, 'tversky_beta': 1.2113651812061705}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3029.12 column/sec. Elapsed time 12.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.14 sec. Users per second: 1074


[I 2024-12-16 10:58:26,929] Trial 321 finished with value: 0.23019968480245404 and parameters: {'similarity': 'cosine', 'topK': 15, 'shrink': 194, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2828.29 column/sec. Elapsed time 13.48 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.73 sec. Users per second: 1055


[I 2024-12-16 10:59:14,656] Trial 322 finished with value: 0.2619870605083989 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 251, 'feature_weighting': 'BM25', 'tversky_alpha': 0.07130375937940239, 'tversky_beta': 1.2513465736068414}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2840.05 column/sec. Elapsed time 13.42 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.72 sec. Users per second: 1056


[I 2024-12-16 11:00:02,321] Trial 323 finished with value: 0.26189856161441094 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 206, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06126060742882955, 'tversky_beta': 1.2339470673076065}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3623.71 column/sec. Elapsed time 10.52 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.47 sec. Users per second: 1397


[I 2024-12-16 11:00:38,812] Trial 324 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 212, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05822140494206268, 'tversky_beta': 1.2462579121288462}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2846.37 column/sec. Elapsed time 13.39 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.57 sec. Users per second: 1060


[I 2024-12-16 11:01:26,308] Trial 325 finished with value: 0.2613025579610756 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 241, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1972502206141482, 'tversky_beta': 1.1659787272203015}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3646.12 column/sec. Elapsed time 10.46 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.46 sec. Users per second: 1398


[I 2024-12-16 11:02:02,727] Trial 326 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 211, 'feature_weighting': 'BM25', 'tversky_alpha': 0.27349834262566985, 'tversky_beta': 1.1489524353094749}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3090.55 column/sec. Elapsed time 12.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.99 sec. Users per second: 1017


[I 2024-12-16 11:02:50,546] Trial 327 finished with value: 0.2585827919466985 and parameters: {'similarity': 'jaccard', 'topK': 19, 'shrink': 260, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2782.10 column/sec. Elapsed time 13.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.57 sec. Users per second: 1030


[I 2024-12-16 11:03:39,369] Trial 328 finished with value: 0.2611065634950271 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 244, 'feature_weighting': 'BM25', 'tversky_alpha': 0.18068724041587791, 'tversky_beta': 1.0734578841190676}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2738.48 column/sec. Elapsed time 13.92 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.76 sec. Users per second: 1024


[I 2024-12-16 11:04:28,581] Trial 329 finished with value: 0.260488280866666 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 244, 'feature_weighting': 'BM25', 'tversky_alpha': 0.20896235895076773, 'tversky_beta': 1.083770597653328}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 469.83 column/sec. Elapsed time 1.35 min
EvaluatorHoldout: Processed 35595 (100.0%) in 45.01 sec. Users per second: 791


[I 2024-12-16 11:06:35,415] Trial 330 finished with value: 0.15619926230679576 and parameters: {'similarity': 'euclidean', 'topK': 334, 'shrink': 227, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'exp', 'normalize': True}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3053.94 column/sec. Elapsed time 12.48 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.79 sec. Users per second: 1023


[I 2024-12-16 11:07:23,191] Trial 331 finished with value: 0.25570351713695544 and parameters: {'similarity': 'dice', 'topK': 15, 'shrink': 185, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2726.71 column/sec. Elapsed time 13.98 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.93 sec. Users per second: 1019


[I 2024-12-16 11:08:12,554] Trial 332 finished with value: 0.24206049990614126 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 272, 'feature_weighting': 'none', 'tversky_alpha': 0.16963631397617354, 'tversky_beta': 1.2009166760907324}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2737.65 column/sec. Elapsed time 13.92 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.09 sec. Users per second: 1014


[I 2024-12-16 11:09:02,115] Trial 333 finished with value: 0.2611617304174975 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 245, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06982295755923247, 'tversky_beta': 1.0076863678414345}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3177.53 column/sec. Elapsed time 12.00 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 30.62 sec. Users per second: 1162


[I 2024-12-16 11:09:45,255] Trial 334 finished with value: 0.16565932096685687 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 254, 'feature_weighting': 'BM25', 'tversky_alpha': 0.0836449674823405, 'tversky_beta': 1.0090320464916347}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2712.50 column/sec. Elapsed time 14.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.86 sec. Users per second: 1021


[I 2024-12-16 11:10:34,716] Trial 335 finished with value: 0.26154838744047815 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 209, 'feature_weighting': 'BM25', 'tversky_alpha': 0.13862552115331253, 'tversky_beta': 1.0496521833201045}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3466.03 column/sec. Elapsed time 11.00 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 26.61 sec. Users per second: 1338


[I 2024-12-16 11:11:12,851] Trial 336 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 206, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1421262569733662, 'tversky_beta': 0.9429176741717534}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2718.15 column/sec. Elapsed time 14.02 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.36 sec. Users per second: 1007


[I 2024-12-16 11:12:02,787] Trial 337 finished with value: 0.26185756242325087 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 238, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11788437141807355, 'tversky_beta': 1.0741746882491583}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2728.54 column/sec. Elapsed time 13.97 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.28 sec. Users per second: 1009


[I 2024-12-16 11:12:52,581] Trial 338 finished with value: 0.26206091681910443 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 227, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05357454245514742, 'tversky_beta': 1.1037517231863931}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2737.97 column/sec. Elapsed time 13.92 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.84 sec. Users per second: 1022


[I 2024-12-16 11:13:41,861] Trial 339 finished with value: 0.24459245354208642 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 223, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.028365195266393986, 'tversky_beta': 1.1023249693309227}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2713.38 column/sec. Elapsed time 14.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.70 sec. Users per second: 1026


[I 2024-12-16 11:14:31,142] Trial 340 finished with value: 0.259927499183619 and parameters: {'similarity': 'tversky', 'topK': 11, 'shrink': 172, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10871049410671933, 'tversky_beta': 1.0526540993971987}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2909.85 column/sec. Elapsed time 13.10 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.40 sec. Users per second: 1035


[I 2024-12-16 11:15:19,000] Trial 341 finished with value: 0.23031091030329767 and parameters: {'similarity': 'asymmetric', 'topK': 16, 'shrink': 205, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.4084382239398847}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2372.91 column/sec. Elapsed time 16.07 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.90 sec. Users per second: 1116


[I 2024-12-16 11:16:07,493] Trial 342 finished with value: 0.21819001467345903 and parameters: {'similarity': 'tversky', 'topK': 2, 'shrink': 247, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05594383736061559, 'tversky_beta': 1.1304891947679883}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2694.77 column/sec. Elapsed time 14.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.29 sec. Users per second: 1009


[I 2024-12-16 11:16:57,525] Trial 343 finished with value: 0.2619461961971327 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 151, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15232916665437055, 'tversky_beta': 1.0410518891997254}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2713.28 column/sec. Elapsed time 14.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.75 sec. Users per second: 996


[I 2024-12-16 11:17:47,918] Trial 344 finished with value: 0.2617431871681224 and parameters: {'similarity': 'tversky', 'topK': 19, 'shrink': 229, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1478984920463898, 'tversky_beta': 0.9588427147055112}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3480.95 column/sec. Elapsed time 10.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 26.16 sec. Users per second: 1361


[I 2024-12-16 11:18:25,545] Trial 345 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 147, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15066875721016987, 'tversky_beta': 0.9722241507917945}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2820.25 column/sec. Elapsed time 13.52 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.73 sec. Users per second: 1025


[I 2024-12-16 11:19:14,330] Trial 346 finished with value: 0.26211499901317664 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 189, 'feature_weighting': 'BM25', 'tversky_alpha': 0.0966279464739558, 'tversky_beta': 1.0245650533994344}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2836.14 column/sec. Elapsed time 13.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.66 sec. Users per second: 971


[I 2024-12-16 11:20:05,009] Trial 347 finished with value: 0.25940425471348716 and parameters: {'similarity': 'tversky', 'topK': 49, 'shrink': 187, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12362536683956249, 'tversky_beta': 1.1806939938618988}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2853.63 column/sec. Elapsed time 13.36 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.86 sec. Users per second: 1051


[I 2024-12-16 11:20:52,755] Trial 348 finished with value: 0.2607860604297772 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 132, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2376483940079333, 'tversky_beta': 1.2422020959080897}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2836.24 column/sec. Elapsed time 13.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.27 sec. Users per second: 1039


[I 2024-12-16 11:21:41,008] Trial 349 finished with value: 0.26190194392799054 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 167, 'feature_weighting': 'BM25', 'tversky_alpha': 0.0029454888143112534, 'tversky_beta': 1.283452280285982}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2795.43 column/sec. Elapsed time 13.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.62 sec. Users per second: 999


[I 2024-12-16 11:22:30,822] Trial 350 finished with value: 0.2583276443838678 and parameters: {'similarity': 'tversky', 'topK': 45, 'shrink': 109, 'feature_weighting': 'BM25', 'tversky_alpha': 0.015150062253300067, 'tversky_beta': 1.1682483513043116}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2694.60 column/sec. Elapsed time 14.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 50.44 sec. Users per second: 706


[I 2024-12-16 11:23:36,649] Trial 351 finished with value: 0.22986300765272757 and parameters: {'similarity': 'tversky', 'topK': 564, 'shrink': 160, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09924613196643603, 'tversky_beta': 1.0392913558401036}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3080.02 column/sec. Elapsed time 12.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.49 sec. Users per second: 1032


[I 2024-12-16 11:24:23,864] Trial 352 finished with value: 0.23071494455539357 and parameters: {'similarity': 'cosine', 'topK': 24, 'shrink': 191, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2776.22 column/sec. Elapsed time 13.73 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.87 sec. Users per second: 965


[I 2024-12-16 11:25:15,038] Trial 353 finished with value: 0.2595319306341642 and parameters: {'similarity': 'tversky', 'topK': 43, 'shrink': 167, 'feature_weighting': 'BM25', 'tversky_alpha': 0.04596501706219149, 'tversky_beta': 0.9071846980409546}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2723.03 column/sec. Elapsed time 14.00 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.74 sec. Users per second: 1025


[I 2024-12-16 11:26:04,325] Trial 354 finished with value: 0.26200932039471336 and parameters: {'similarity': 'tversky', 'topK': 21, 'shrink': 213, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14104621604090153, 'tversky_beta': 1.284355174667943}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2803.72 column/sec. Elapsed time 13.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.08 sec. Users per second: 1015


[I 2024-12-16 11:26:53,574] Trial 355 finished with value: 0.26208415315370015 and parameters: {'similarity': 'tversky', 'topK': 25, 'shrink': 222, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15280660981716535, 'tversky_beta': 1.2984034451420106}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2811.42 column/sec. Elapsed time 13.56 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.89 sec. Users per second: 1050


[I 2024-12-16 11:27:41,479] Trial 356 finished with value: 0.26037428001947727 and parameters: {'similarity': 'tversky', 'topK': 25, 'shrink': 71, 'feature_weighting': 'none', 'tversky_alpha': 0.1467689946299328, 'tversky_beta': 1.2931016226460488}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 587.89 column/sec. Elapsed time 1.08 min
EvaluatorHoldout: Processed 35595 (100.0%) in 37.94 sec. Users per second: 938


[I 2024-12-16 11:29:24,596] Trial 357 finished with value: 0.1425574827841472 and parameters: {'similarity': 'euclidean', 'topK': 41, 'shrink': 218, 'feature_weighting': 'BM25', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'log', 'normalize': False}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3601.34 column/sec. Elapsed time 10.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 26.35 sec. Users per second: 1351


[I 2024-12-16 11:30:02,041] Trial 358 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 215, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09074118977772906, 'tversky_beta': 1.2893325911421232}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2982.54 column/sec. Elapsed time 12.78 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.43 sec. Users per second: 926


[I 2024-12-16 11:30:53,820] Trial 359 finished with value: 0.2566664114399765 and parameters: {'similarity': 'jaccard', 'topK': 52, 'shrink': 267, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2772.19 column/sec. Elapsed time 13.75 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.47 sec. Users per second: 1004


[I 2024-12-16 11:31:43,596] Trial 360 finished with value: 0.2614662659965757 and parameters: {'similarity': 'tversky', 'topK': 25, 'shrink': 183, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2669191427234491, 'tversky_beta': 1.121786873087915}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2613.51 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 52.72 sec. Users per second: 675


[I 2024-12-16 11:32:52,221] Trial 361 finished with value: 0.23176500048246382 and parameters: {'similarity': 'tversky', 'topK': 606, 'shrink': 183, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2786212059702733, 'tversky_beta': 1.0880495137364834}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2713.56 column/sec. Elapsed time 14.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.97 sec. Users per second: 990


[I 2024-12-16 11:33:42,778] Trial 362 finished with value: 0.248492414904789 and parameters: {'similarity': 'tversky', 'topK': 28, 'shrink': 159, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.24119272943828457, 'tversky_beta': 1.225419490745482}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2994.59 column/sec. Elapsed time 12.73 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.38 sec. Users per second: 952


[I 2024-12-16 11:34:33,448] Trial 363 finished with value: 0.2561535950047537 and parameters: {'similarity': 'dice', 'topK': 41, 'shrink': 209, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2776.74 column/sec. Elapsed time 13.73 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.54 sec. Users per second: 1002


[I 2024-12-16 11:35:23,274] Trial 364 finished with value: 0.261337407314701 and parameters: {'similarity': 'tversky', 'topK': 26, 'shrink': 138, 'feature_weighting': 'BM25', 'tversky_alpha': 0.03263161848897054, 'tversky_beta': 0.9768162265087075}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2676.36 column/sec. Elapsed time 14.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 45.50 sec. Users per second: 782


[I 2024-12-16 11:36:23,888] Trial 365 finished with value: 0.241013664610795 and parameters: {'similarity': 'tversky', 'topK': 277, 'shrink': 179, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10383367090856473, 'tversky_beta': 1.1262262477168365}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2751.03 column/sec. Elapsed time 13.86 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.13 sec. Users per second: 933


[I 2024-12-16 11:37:16,472] Trial 366 finished with value: 0.25753951094245026 and parameters: {'similarity': 'tversky', 'topK': 60, 'shrink': 224, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14801097384269118, 'tversky_beta': 1.2794425165820047}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2677.83 column/sec. Elapsed time 14.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.12 sec. Users per second: 986


[I 2024-12-16 11:38:07,392] Trial 367 finished with value: 0.2583843836511783 and parameters: {'similarity': 'tversky', 'topK': 40, 'shrink': 98, 'feature_weighting': 'BM25', 'tversky_alpha': 0.00016918859039505296, 'tversky_beta': 1.2165494721383372}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2748.10 column/sec. Elapsed time 13.87 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.74 sec. Users per second: 996


[I 2024-12-16 11:38:57,570] Trial 368 finished with value: 0.26187887557017775 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 276, 'feature_weighting': 'BM25', 'tversky_alpha': 0.07343440655048947, 'tversky_beta': 1.2939782057989033}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2669.53 column/sec. Elapsed time 14.28 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.56 sec. Users per second: 948


[I 2024-12-16 11:39:50,033] Trial 369 finished with value: 0.25950314171976885 and parameters: {'similarity': 'tversky', 'topK': 50, 'shrink': 196, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06077500829050925, 'tversky_beta': 1.2957570501622677}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2764.65 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.37 sec. Users per second: 979


[I 2024-12-16 11:40:40,774] Trial 370 finished with value: 0.2608800925546993 and parameters: {'similarity': 'tversky', 'topK': 27, 'shrink': 284, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08432672244090507, 'tversky_beta': 1.0313190965149195}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2942.99 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.01 sec. Users per second: 1017


[I 2024-12-16 11:41:29,119] Trial 371 finished with value: 0.22925125436279556 and parameters: {'similarity': 'asymmetric', 'topK': 26, 'shrink': 227, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.9223880046158849}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2750.09 column/sec. Elapsed time 13.86 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.34 sec. Users per second: 953


[I 2024-12-16 11:42:20,899] Trial 372 finished with value: 0.2601201944932387 and parameters: {'similarity': 'tversky', 'topK': 42, 'shrink': 264, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2202380709645025, 'tversky_beta': 1.29739116101582}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2760.47 column/sec. Elapsed time 13.81 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.43 sec. Users per second: 1005


[I 2024-12-16 11:43:10,677] Trial 373 finished with value: 0.2616923374433855 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 168, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12889654734401929, 'tversky_beta': 1.109266103564759}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2768.57 column/sec. Elapsed time 13.77 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.45 sec. Users per second: 950


[I 2024-12-16 11:44:02,484] Trial 374 finished with value: 0.25734495342101293 and parameters: {'similarity': 'tversky', 'topK': 58, 'shrink': 164, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12446521753208428, 'tversky_beta': 1.0566288091657323}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3258.56 column/sec. Elapsed time 11.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 30.21 sec. Users per second: 1178


[I 2024-12-16 11:44:44,918] Trial 375 finished with value: 0.16619497847955061 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 281, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06810106051884313, 'tversky_beta': 0.9335971938507537}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2777.32 column/sec. Elapsed time 13.73 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.59 sec. Users per second: 973


[I 2024-12-16 11:45:35,805] Trial 376 finished with value: 0.2605539494235269 and parameters: {'similarity': 'tversky', 'topK': 37, 'shrink': 202, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11467627929489568, 'tversky_beta': 0.8570350587130717}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2765.33 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.10 sec. Users per second: 1014


[I 2024-12-16 11:46:25,228] Trial 377 finished with value: 0.2620211292017209 and parameters: {'similarity': 'tversky', 'topK': 20, 'shrink': 225, 'feature_weighting': 'BM25', 'tversky_alpha': 0.16578701830060888, 'tversky_beta': 1.1975081587359298}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2766.60 column/sec. Elapsed time 13.78 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.64 sec. Users per second: 921


[I 2024-12-16 11:47:18,234] Trial 378 finished with value: 0.255722868962822 and parameters: {'similarity': 'tversky', 'topK': 72, 'shrink': 230, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1698582185103789, 'tversky_beta': 1.2100283521980393}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2801.28 column/sec. Elapsed time 13.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.22 sec. Users per second: 1011


[I 2024-12-16 11:48:07,531] Trial 379 finished with value: 0.24176057236583215 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 257, 'feature_weighting': 'none', 'tversky_alpha': 0.03275752154471945, 'tversky_beta': 1.2669574609838248}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2806.48 column/sec. Elapsed time 13.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.57 sec. Users per second: 973


[I 2024-12-16 11:48:58,276] Trial 380 finished with value: 0.25945351373093617 and parameters: {'similarity': 'tversky', 'topK': 46, 'shrink': 226, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09644244795218757, 'tversky_beta': 1.1867111960506378}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3041.06 column/sec. Elapsed time 12.54 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.58 sec. Users per second: 1029


[I 2024-12-16 11:49:45,752] Trial 381 finished with value: 0.23083437105317808 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 262, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2768.46 column/sec. Elapsed time 13.77 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.59 sec. Users per second: 973


[I 2024-12-16 11:50:36,658] Trial 382 finished with value: 0.2609576866495493 and parameters: {'similarity': 'tversky', 'topK': 38, 'shrink': 288, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2091258829313033, 'tversky_beta': 1.3038738579154663}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2763.53 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.28 sec. Users per second: 1038


[I 2024-12-16 11:51:25,294] Trial 383 finished with value: 0.26204726967681846 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 232, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06380016077198145, 'tversky_beta': 1.2531861510011284}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2762.96 column/sec. Elapsed time 13.80 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.55 sec. Users per second: 1061


[I 2024-12-16 11:52:13,178] Trial 384 finished with value: 0.2598886779118495 and parameters: {'similarity': 'tversky', 'topK': 10, 'shrink': 234, 'feature_weighting': 'BM25', 'tversky_alpha': 0.060107771690505823, 'tversky_beta': 1.2515466705684324}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3090.94 column/sec. Elapsed time 12.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.03 sec. Users per second: 1046


[I 2024-12-16 11:53:00,037] Trial 385 finished with value: 0.24736653998451158 and parameters: {'similarity': 'jaccard', 'topK': 15, 'shrink': 149, 'feature_weighting': 'TF-IDF'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3585.17 column/sec. Elapsed time 10.63 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.81 sec. Users per second: 1379


[I 2024-12-16 11:53:36,980] Trial 386 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 197, 'feature_weighting': 'BM25', 'tversky_alpha': 0.025194044739224875, 'tversky_beta': 1.1539335110812012}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 638.94 column/sec. Elapsed time 59.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.49 sec. Users per second: 901


[I 2024-12-16 11:55:16,466] Trial 387 finished with value: 0.149331055861729 and parameters: {'similarity': 'euclidean', 'topK': 54, 'shrink': 126, 'feature_weighting': 'BM25', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'lin', 'normalize': False}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2801.06 column/sec. Elapsed time 13.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.22 sec. Users per second: 1011


[I 2024-12-16 11:56:05,838] Trial 388 finished with value: 0.2622023586560702 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 252, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1459198544992233, 'tversky_beta': 1.2215133872778121}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2775.33 column/sec. Elapsed time 13.74 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.07 sec. Users per second: 1045


[I 2024-12-16 11:56:54,178] Trial 389 finished with value: 0.26195762016907914 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 245, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08183194095460321, 'tversky_beta': 1.214901089275174}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3726.12 column/sec. Elapsed time 10.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 29.94 sec. Users per second: 1189


[I 2024-12-16 11:57:34,833] Trial 390 finished with value: 0.16632948242858206 and parameters: {'similarity': 'dice', 'topK': 1, 'shrink': 255, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3594.57 column/sec. Elapsed time 10.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.75 sec. Users per second: 1382


[I 2024-12-16 11:58:11,694] Trial 391 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 276, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08204351642696436, 'tversky_beta': 1.209391318783885}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2797.22 column/sec. Elapsed time 13.63 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.35 sec. Users per second: 979


[I 2024-12-16 11:59:02,220] Trial 392 finished with value: 0.26073662704929107 and parameters: {'similarity': 'tversky', 'topK': 39, 'shrink': 257, 'feature_weighting': 'BM25', 'tversky_alpha': 0.037827216059941826, 'tversky_beta': 1.2491009310987133}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2803.26 column/sec. Elapsed time 13.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 43.85 sec. Users per second: 812


[I 2024-12-16 12:00:00,343] Trial 393 finished with value: 0.24173388058052656 and parameters: {'similarity': 'tversky', 'topK': 149, 'shrink': 243, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1836681425544008, 'tversky_beta': 0.015865129054078997}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2836.55 column/sec. Elapsed time 13.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.40 sec. Users per second: 1035


[I 2024-12-16 12:00:48,702] Trial 394 finished with value: 0.26245476199333995 and parameters: {'similarity': 'tversky', 'topK': 20, 'shrink': 215, 'feature_weighting': 'BM25', 'tversky_alpha': 0.07715795658043681, 'tversky_beta': 1.2757568279290168}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2807.60 column/sec. Elapsed time 13.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.95 sec. Users per second: 990


[I 2024-12-16 12:01:38,762] Trial 395 finished with value: 0.26149658053590624 and parameters: {'similarity': 'tversky', 'topK': 36, 'shrink': 214, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09108193427261459, 'tversky_beta': 1.218748243032509}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2815.00 column/sec. Elapsed time 13.54 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.91 sec. Users per second: 1050


[I 2024-12-16 12:02:26,727] Trial 396 finished with value: 0.2618994003963394 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 216, 'feature_weighting': 'BM25', 'tversky_alpha': 0.012517725796138107, 'tversky_beta': 1.3198493210899478}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2793.17 column/sec. Elapsed time 13.65 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.22 sec. Users per second: 956


[I 2024-12-16 12:03:18,170] Trial 397 finished with value: 0.257902181253462 and parameters: {'similarity': 'tversky', 'topK': 56, 'shrink': 211, 'feature_weighting': 'BM25', 'tversky_alpha': 0.009108623971742384, 'tversky_beta': 1.3090305360592505}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3621.41 column/sec. Elapsed time 10.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.59 sec. Users per second: 1391


[I 2024-12-16 12:03:54,788] Trial 398 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 194, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06079060250466663, 'tversky_beta': 1.2632038955356517}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2823.92 column/sec. Elapsed time 13.50 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.07 sec. Users per second: 987


[I 2024-12-16 12:04:44,916] Trial 399 finished with value: 0.26144707487538366 and parameters: {'similarity': 'tversky', 'topK': 34, 'shrink': 225, 'feature_weighting': 'BM25', 'tversky_alpha': 0.006039293699870875, 'tversky_beta': 1.3100059956561072}. Best is trial 312 with value: 0.2627796224180537.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ItemKNNCFRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Similarity column 38121 (100.0%), 2497.79 column/sec. Elapsed time 15.26 sec


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_ItemKNNCF_Recall.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/best_params_ItemKNNCF_Recall.json' created successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/history_ItemKNNCF_Recall.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/Submission/submission_ItemKNNCF_Recall.csv' created successfully.
