# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 4251, done.[K
remote: Counting objects: 100% (1724/1724), done.[K
remote: Compressing objects: 100% (637/637), done.[K
remote: Total 4251 (delta 918), reused 1560 (delta 841), pack-reused 2527 (from 1)[K
Receiving objects: 100% (4251/4251), 171.96 MiB | 18.76 MiB/s, done.
Resolving deltas: 100% (2437/2437), done.
Updating files: 100% (395/395), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithoutKFCV/KNN'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'ItemKNNCF',
    'metric': 'Recall',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNNCF_Recall.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[50])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender

def objective_function_ItemKNNCF(optuna_trial):
    
    recommender_instance = ItemKNNCFRecommender(URM_train)
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[50, "RECALL"]

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ItemKNNCF, n_trials=100)

[I 2024-12-16 12:18:32,182] Using an existing study with name 'hyperparameters_tuning_ItemKNNCF_Recall' instead of creating a new one.


Similarity column 38121 (100.0%), 2589.83 column/sec. Elapsed time 14.72 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.55 sec. Users per second: 974


[I 2024-12-16 12:19:24,276] Trial 400 finished with value: 0.26275610750067996 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 238, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10714713809847944, 'tversky_beta': 1.2564735093468256}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2603.18 column/sec. Elapsed time 14.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.29 sec. Users per second: 955


[I 2024-12-16 12:20:16,862] Trial 401 finished with value: 0.2618714262326493 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 181, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06095114384328598, 'tversky_beta': 1.320952515227453}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2759.41 column/sec. Elapsed time 13.81 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 43.69 sec. Users per second: 815


[I 2024-12-16 12:21:14,740] Trial 402 finished with value: 0.201124874042736 and parameters: {'similarity': 'asymmetric', 'topK': 45, 'shrink': 179, 'feature_weighting': 'none', 'asymmetric_alpha': 1.6207186392641413}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2561.26 column/sec. Elapsed time 14.88 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.09 sec. Users per second: 888


[I 2024-12-16 12:22:10,447] Trial 403 finished with value: 0.25710108514083974 and parameters: {'similarity': 'tversky', 'topK': 63, 'shrink': 197, 'feature_weighting': 'BM25', 'tversky_alpha': 0.04960264284597339, 'tversky_beta': 1.3160719728417039}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2614.79 column/sec. Elapsed time 14.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.25 sec. Users per second: 956


[I 2024-12-16 12:23:02,942] Trial 404 finished with value: 0.2619659757745489 and parameters: {'similarity': 'tversky', 'topK': 29, 'shrink': 212, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05676141022040157, 'tversky_beta': 1.3333179227573766}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2627.89 column/sec. Elapsed time 14.51 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.60 sec. Users per second: 947


[I 2024-12-16 12:23:55,698] Trial 405 finished with value: 0.2615256473469241 and parameters: {'similarity': 'tversky', 'topK': 35, 'shrink': 218, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10819222846203597, 'tversky_beta': 1.2680235923097876}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2639.24 column/sec. Elapsed time 14.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.78 sec. Users per second: 918


[I 2024-12-16 12:24:49,592] Trial 406 finished with value: 0.25904023120500175 and parameters: {'similarity': 'tversky', 'topK': 48, 'shrink': 252, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15698457988635373, 'tversky_beta': 1.196564728249695}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2624.14 column/sec. Elapsed time 14.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.25 sec. Users per second: 956


[I 2024-12-16 12:25:42,011] Trial 407 finished with value: 0.2620269398225331 and parameters: {'similarity': 'tversky', 'topK': 29, 'shrink': 208, 'feature_weighting': 'BM25', 'tversky_alpha': 0.03857760533543807, 'tversky_beta': 1.2680569580273011}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2608.81 column/sec. Elapsed time 14.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.72 sec. Users per second: 874


[I 2024-12-16 12:26:38,052] Trial 408 finished with value: 0.23521677086590548 and parameters: {'similarity': 'tversky', 'topK': 69, 'shrink': 209, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.0007051656162889424, 'tversky_beta': 1.2479980697651707}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2789.72 column/sec. Elapsed time 13.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.47 sec. Users per second: 950


[I 2024-12-16 12:27:29,671] Trial 409 finished with value: 0.23191764524426378 and parameters: {'similarity': 'cosine', 'topK': 35, 'shrink': 219, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2573.05 column/sec. Elapsed time 14.82 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.69 sec. Users per second: 997


[I 2024-12-16 12:28:20,830] Trial 410 finished with value: 0.261435742312911 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 196, 'feature_weighting': 'BM25', 'tversky_alpha': 0.044433510794712715, 'tversky_beta': 1.3384062942827955}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2631.62 column/sec. Elapsed time 14.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.82 sec. Users per second: 941


[I 2024-12-16 12:29:13,771] Trial 411 finished with value: 0.2612035622658871 and parameters: {'similarity': 'tversky', 'topK': 33, 'shrink': 233, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11690560938330652, 'tversky_beta': 1.2093543803398463}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2622.85 column/sec. Elapsed time 14.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.81 sec. Users per second: 917


[I 2024-12-16 12:30:07,824] Trial 412 finished with value: 0.2586346810955781 and parameters: {'similarity': 'tversky', 'topK': 54, 'shrink': 172, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15440474401217477, 'tversky_beta': 1.2703229031459669}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2480.61 column/sec. Elapsed time 15.37 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 53.40 sec. Users per second: 667


[I 2024-12-16 12:31:17,932] Trial 413 finished with value: 0.23010692602085073 and parameters: {'similarity': 'tversky', 'topK': 521, 'shrink': 209, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08338553188402865, 'tversky_beta': 1.1709214175615712}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2466.48 column/sec. Elapsed time 15.46 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 57.03 sec. Users per second: 624


[I 2024-12-16 12:32:32,046] Trial 414 finished with value: 0.22331558794958836 and parameters: {'similarity': 'tversky', 'topK': 703, 'shrink': 246, 'feature_weighting': 'BM25', 'tversky_alpha': 0.03299381992953983, 'tversky_beta': 1.235951707725429}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3696.73 column/sec. Elapsed time 10.31 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.60 sec. Users per second: 1290


[I 2024-12-16 12:33:10,599] Trial 415 finished with value: 0.005972113065949917 and parameters: {'similarity': 'jaccard', 'topK': 0, 'shrink': 226, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 502.20 column/sec. Elapsed time 1.27 min
EvaluatorHoldout: Processed 35595 (100.0%) in 35.18 sec. Users per second: 1012


[I 2024-12-16 12:35:02,081] Trial 416 finished with value: 0.18484457086032494 and parameters: {'similarity': 'euclidean', 'topK': 16, 'shrink': 183, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2608.16 column/sec. Elapsed time 14.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.72 sec. Users per second: 969


[I 2024-12-16 12:35:54,072] Trial 417 finished with value: 0.2609533576868001 and parameters: {'similarity': 'tversky', 'topK': 28, 'shrink': 150, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11737444990406282, 'tversky_beta': 1.3270009239172356}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2617.75 column/sec. Elapsed time 14.56 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.77 sec. Users per second: 918


[I 2024-12-16 12:36:48,059] Trial 418 finished with value: 0.2593520871754806 and parameters: {'similarity': 'tversky', 'topK': 46, 'shrink': 250, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15613339595806425, 'tversky_beta': 1.2726219870919782}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2620.77 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.85 sec. Users per second: 993


[I 2024-12-16 12:37:39,093] Trial 419 finished with value: 0.26148037848223993 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 203, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06752044807725029, 'tversky_beta': 1.1953675207975398}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3287.13 column/sec. Elapsed time 11.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.52 sec. Users per second: 1294


[I 2024-12-16 12:38:18,812] Trial 420 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 229, 'feature_weighting': 'BM25', 'tversky_alpha': 0.0014897153135256334, 'tversky_beta': 1.3496593299011388}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2630.92 column/sec. Elapsed time 14.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.16 sec. Users per second: 958


[I 2024-12-16 12:39:11,134] Trial 421 finished with value: 0.26179536715677937 and parameters: {'similarity': 'tversky', 'topK': 25, 'shrink': 256, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10495231671952415, 'tversky_beta': 1.232366313891266}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2875.17 column/sec. Elapsed time 13.26 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.75 sec. Users per second: 919


[I 2024-12-16 12:40:03,757] Trial 422 finished with value: 0.25643058777782013 and parameters: {'similarity': 'dice', 'topK': 38, 'shrink': 198, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2592.66 column/sec. Elapsed time 14.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.05 sec. Users per second: 961


[I 2024-12-16 12:40:56,168] Trial 423 finished with value: 0.2624178864872243 and parameters: {'similarity': 'tversky', 'topK': 16, 'shrink': 223, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05655304229505628, 'tversky_beta': 1.2791103471767402}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2572.75 column/sec. Elapsed time 14.82 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.81 sec. Users per second: 894


[I 2024-12-16 12:41:51,498] Trial 424 finished with value: 0.25819858518975547 and parameters: {'similarity': 'tversky', 'topK': 55, 'shrink': 239, 'feature_weighting': 'BM25', 'tversky_alpha': 0.16417131465973184, 'tversky_beta': 1.2926052654505793}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2572.76 column/sec. Elapsed time 14.82 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.16 sec. Users per second: 933


[I 2024-12-16 12:42:45,013] Trial 425 finished with value: 0.24394674287844897 and parameters: {'similarity': 'tversky', 'topK': 29, 'shrink': 221, 'feature_weighting': 'none', 'tversky_alpha': 0.037091748999623614, 'tversky_beta': 1.3473899572391463}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2549.96 column/sec. Elapsed time 14.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.60 sec. Users per second: 973


[I 2024-12-16 12:43:37,219] Trial 426 finished with value: 0.2607403491312897 and parameters: {'similarity': 'tversky', 'topK': 12, 'shrink': 264, 'feature_weighting': 'BM25', 'tversky_alpha': 0.12786965608603523, 'tversky_beta': 1.278810450893323}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2582.92 column/sec. Elapsed time 14.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.83 sec. Users per second: 894


[I 2024-12-16 12:44:32,521] Trial 427 finished with value: 0.256296079289691 and parameters: {'similarity': 'tversky', 'topK': 72, 'shrink': 165, 'feature_weighting': 'BM25', 'tversky_alpha': 0.19373350734308414, 'tversky_beta': 1.3151114160880166}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2781.29 column/sec. Elapsed time 13.71 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.45 sec. Users per second: 950


[I 2024-12-16 12:45:24,159] Trial 428 finished with value: 0.23253074512444968 and parameters: {'similarity': 'asymmetric', 'topK': 45, 'shrink': 239, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.5399471319809531}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2613.15 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.53 sec. Users per second: 1002


[I 2024-12-16 12:46:14,942] Trial 429 finished with value: 0.23997731336792064 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 13, 'feature_weighting': 'BM25', 'tversky_alpha': 0.3046145880352984, 'tversky_beta': 1.199960039479974}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3321.09 column/sec. Elapsed time 11.48 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.72 sec. Users per second: 1284


[I 2024-12-16 12:46:54,812] Trial 430 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 222, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09372218755222089, 'tversky_beta': 1.269432874828072}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2614.15 column/sec. Elapsed time 14.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.99 sec. Users per second: 937


[I 2024-12-16 12:47:48,042] Trial 431 finished with value: 0.26077932752594174 and parameters: {'similarity': 'tversky', 'topK': 36, 'shrink': 260, 'feature_weighting': 'BM25', 'tversky_alpha': 0.033301636946555, 'tversky_beta': 1.1790110712246558}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2616.74 column/sec. Elapsed time 14.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.94 sec. Users per second: 1019


[I 2024-12-16 12:48:38,173] Trial 432 finished with value: 0.2611961105529899 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 74, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.13627399237527998, 'tversky_beta': 1.3466862346331954}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2613.84 column/sec. Elapsed time 14.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.78 sec. Users per second: 968


[I 2024-12-16 12:49:30,201] Trial 433 finished with value: 0.2619109988511488 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 189, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08367325970232574, 'tversky_beta': 1.2424093748534428}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2611.96 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.60 sec. Users per second: 877


[I 2024-12-16 12:50:26,120] Trial 434 finished with value: 0.2552650318883829 and parameters: {'similarity': 'tversky', 'topK': 84, 'shrink': 184, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09396187732847526, 'tversky_beta': 1.23461136231767}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2599.26 column/sec. Elapsed time 14.67 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.95 sec. Users per second: 914


[I 2024-12-16 12:51:20,421] Trial 435 finished with value: 0.2575410461277898 and parameters: {'similarity': 'tversky', 'topK': 59, 'shrink': 137, 'feature_weighting': 'BM25', 'tversky_alpha': 0.19698091581927396, 'tversky_beta': 1.146246286211464}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2612.81 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.87 sec. Users per second: 940


[I 2024-12-16 12:52:13,565] Trial 436 finished with value: 0.26025225660889595 and parameters: {'similarity': 'tversky', 'topK': 42, 'shrink': 190, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1311192806226537, 'tversky_beta': 1.250675659664746}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2638.05 column/sec. Elapsed time 14.45 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.97 sec. Users per second: 963


[I 2024-12-16 12:53:05,638] Trial 437 finished with value: 0.261417437711112 and parameters: {'similarity': 'tversky', 'topK': 27, 'shrink': 162, 'feature_weighting': 'BM25', 'tversky_alpha': 0.07197234778197922, 'tversky_beta': 1.293707155168567}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2835.06 column/sec. Elapsed time 13.45 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.42 sec. Users per second: 977


[I 2024-12-16 12:53:55,940] Trial 438 finished with value: 0.231323491617951 and parameters: {'similarity': 'cosine', 'topK': 30, 'shrink': 279, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2566.34 column/sec. Elapsed time 14.85 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 45.38 sec. Users per second: 784


[I 2024-12-16 12:54:57,014] Trial 439 finished with value: 0.24566620510400572 and parameters: {'similarity': 'tversky', 'topK': 191, 'shrink': 241, 'feature_weighting': 'BM25', 'tversky_alpha': 0.16975358908277222, 'tversky_beta': 1.2123011233987695}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2608.42 column/sec. Elapsed time 14.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.72 sec. Users per second: 919


[I 2024-12-16 12:55:51,058] Trial 440 finished with value: 0.2591601189717742 and parameters: {'similarity': 'tversky', 'topK': 49, 'shrink': 206, 'feature_weighting': 'BM25', 'tversky_alpha': 0.24729095906975596, 'tversky_beta': 1.2635765398173384}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2632.33 column/sec. Elapsed time 14.48 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.98 sec. Users per second: 989


[I 2024-12-16 12:56:42,151] Trial 441 finished with value: 0.2601505953459832 and parameters: {'similarity': 'tversky', 'topK': 11, 'shrink': 254, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09465971370457511, 'tversky_beta': 1.2207585256754685}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3690.40 column/sec. Elapsed time 10.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.65 sec. Users per second: 1287


[I 2024-12-16 12:57:20,726] Trial 442 finished with value: 0.005972113065949917 and parameters: {'similarity': 'jaccard', 'topK': 0, 'shrink': 227, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2617.15 column/sec. Elapsed time 14.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.95 sec. Users per second: 963


[I 2024-12-16 12:58:12,878] Trial 443 finished with value: 0.26159628987195144 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 173, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14642532949548592, 'tversky_beta': 1.1598424450229217}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 448.46 column/sec. Elapsed time 1.42 min
EvaluatorHoldout: Processed 35595 (100.0%) in 41.06 sec. Users per second: 867


[I 2024-12-16 13:00:19,340] Trial 444 finished with value: 0.14200618697640893 and parameters: {'similarity': 'euclidean', 'topK': 40, 'shrink': 281, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'log', 'normalize': False}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2603.24 column/sec. Elapsed time 14.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.19 sec. Users per second: 957


[I 2024-12-16 13:01:11,822] Trial 445 finished with value: 0.2622579914501702 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 253, 'feature_weighting': 'BM25', 'tversky_alpha': 0.20651519881802316, 'tversky_beta': 1.2924756027946114}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2445.63 column/sec. Elapsed time 15.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 56.30 sec. Users per second: 632


[I 2024-12-16 13:02:25,363] Trial 446 finished with value: 0.2281920825430983 and parameters: {'similarity': 'tversky', 'topK': 649, 'shrink': 266, 'feature_weighting': 'BM25', 'tversky_alpha': 0.21329448984535143, 'tversky_beta': 1.3183413257068541}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2565.96 column/sec. Elapsed time 14.86 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.51 sec. Users per second: 879


[I 2024-12-16 13:03:21,335] Trial 447 finished with value: 0.235973537458903 and parameters: {'similarity': 'tversky', 'topK': 62, 'shrink': 241, 'feature_weighting': 'none', 'tversky_alpha': 0.17797027129415954, 'tversky_beta': 1.3590839710472093}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2856.05 column/sec. Elapsed time 13.35 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.54 sec. Users per second: 974


[I 2024-12-16 13:04:11,840] Trial 448 finished with value: 0.2564973796792391 and parameters: {'similarity': 'dice', 'topK': 14, 'shrink': 251, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2614.01 column/sec. Elapsed time 14.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.43 sec. Users per second: 926


[I 2024-12-16 13:05:05,540] Trial 449 finished with value: 0.26075827569843774 and parameters: {'similarity': 'tversky', 'topK': 37, 'shrink': 284, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2250330284197139, 'tversky_beta': 1.1909368651505265}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2622.31 column/sec. Elapsed time 14.54 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.83 sec. Users per second: 966


[I 2024-12-16 13:05:57,543] Trial 450 finished with value: 0.26206422661931617 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 220, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1328663154962869, 'tversky_beta': 1.2536716290872643}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2604.63 column/sec. Elapsed time 14.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.25 sec. Users per second: 907


[I 2024-12-16 13:06:52,112] Trial 451 finished with value: 0.2590396362787962 and parameters: {'similarity': 'tversky', 'topK': 52, 'shrink': 230, 'feature_weighting': 'BM25', 'tversky_alpha': 0.15737621289898449, 'tversky_beta': 1.2919397717272376}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2598.19 column/sec. Elapsed time 14.67 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.84 sec. Users per second: 993


[I 2024-12-16 13:07:43,247] Trial 452 finished with value: 0.2602717813829644 and parameters: {'similarity': 'tversky', 'topK': 12, 'shrink': 265, 'feature_weighting': 'BM25', 'tversky_alpha': 0.292267276115885, 'tversky_beta': 1.2584815910723168}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3343.02 column/sec. Elapsed time 11.40 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.59 sec. Users per second: 1290


[I 2024-12-16 13:08:22,858] Trial 453 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 214, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1268213826489127, 'tversky_beta': 1.3218033177623962}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2623.52 column/sec. Elapsed time 14.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.04 sec. Users per second: 936


[I 2024-12-16 13:09:16,079] Trial 454 finished with value: 0.23938055666052888 and parameters: {'similarity': 'tversky', 'topK': 35, 'shrink': 239, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.19651632198381155, 'tversky_beta': 1.18281795852487}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2620.62 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.08 sec. Users per second: 960


[I 2024-12-16 13:10:08,349] Trial 455 finished with value: 0.2620015051017264 and parameters: {'similarity': 'tversky', 'topK': 23, 'shrink': 257, 'feature_weighting': 'BM25', 'tversky_alpha': 0.13145104517351913, 'tversky_beta': 1.2235148029742646}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2516.67 column/sec. Elapsed time 15.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 49.73 sec. Users per second: 716


[I 2024-12-16 13:11:14,587] Trial 456 finished with value: 0.23589233908738283 and parameters: {'similarity': 'tversky', 'topK': 354, 'shrink': 260, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11381686541483702, 'tversky_beta': 1.2299942481838901}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2773.94 column/sec. Elapsed time 13.74 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.07 sec. Users per second: 911


[I 2024-12-16 13:12:07,936] Trial 457 finished with value: 0.22942506482684802 and parameters: {'similarity': 'asymmetric', 'topK': 69, 'shrink': 295, 'feature_weighting': 'BM25', 'asymmetric_alpha': 1.0431683254034578}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2592.93 column/sec. Elapsed time 14.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.28 sec. Users per second: 906


[I 2024-12-16 13:13:02,653] Trial 458 finished with value: 0.25976238260094 and parameters: {'similarity': 'tversky', 'topK': 45, 'shrink': 247, 'feature_weighting': 'BM25', 'tversky_alpha': 0.24590625781850758, 'tversky_beta': 1.2838808527348826}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2629.77 column/sec. Elapsed time 14.50 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.14 sec. Users per second: 958


[I 2024-12-16 13:13:54,963] Trial 459 finished with value: 0.26192268658484064 and parameters: {'similarity': 'tversky', 'topK': 28, 'shrink': 273, 'feature_weighting': 'BM25', 'tversky_alpha': 0.061581765516474006, 'tversky_beta': 1.358495223447807}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2613.11 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.66 sec. Users per second: 998


[I 2024-12-16 13:14:45,839] Trial 460 finished with value: 0.26120495559864043 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 227, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11849211663399413, 'tversky_beta': 1.1699428577471704}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2612.65 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.14 sec. Users per second: 933


[I 2024-12-16 13:15:39,229] Trial 461 finished with value: 0.26138868319338704 and parameters: {'similarity': 'tversky', 'topK': 34, 'shrink': 254, 'feature_weighting': 'BM25', 'tversky_alpha': 0.18590178908604849, 'tversky_beta': 1.224822689825164}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2528.25 column/sec. Elapsed time 15.08 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 50.59 sec. Users per second: 704


[I 2024-12-16 13:16:46,090] Trial 462 finished with value: 0.23369075413998988 and parameters: {'similarity': 'tversky', 'topK': 424, 'shrink': 210, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06959158793912383, 'tversky_beta': 1.2524724008405423}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2613.88 column/sec. Elapsed time 14.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.16 sec. Users per second: 909


[I 2024-12-16 13:17:40,514] Trial 463 finished with value: 0.2591150621972057 and parameters: {'similarity': 'tversky', 'topK': 51, 'shrink': 234, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1598794640708271, 'tversky_beta': 1.3125489559392327}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3323.10 column/sec. Elapsed time 11.47 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.79 sec. Users per second: 1281


[I 2024-12-16 13:18:20,414] Trial 464 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 285, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11129679760309824, 'tversky_beta': 1.1310676560540733}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2615.81 column/sec. Elapsed time 14.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.96 sec. Users per second: 963


[I 2024-12-16 13:19:12,595] Trial 465 finished with value: 0.26162162490055335 and parameters: {'similarity': 'tversky', 'topK': 25, 'shrink': 213, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2107981328485634, 'tversky_beta': 1.2037164278455317}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2618.00 column/sec. Elapsed time 14.56 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.29 sec. Users per second: 981


[I 2024-12-16 13:20:04,074] Trial 466 finished with value: 0.25639878049041165 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 267, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6331614702166266, 'tversky_beta': 1.2777865070866998}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2853.31 column/sec. Elapsed time 13.36 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.13 sec. Users per second: 959


[I 2024-12-16 13:20:55,028] Trial 467 finished with value: 0.2318250996613241 and parameters: {'similarity': 'cosine', 'topK': 37, 'shrink': 243, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2594.86 column/sec. Elapsed time 14.69 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.49 sec. Users per second: 838


[I 2024-12-16 13:21:53,019] Trial 468 finished with value: 0.251119351176042 and parameters: {'similarity': 'tversky', 'topK': 127, 'shrink': 223, 'feature_weighting': 'BM25', 'tversky_alpha': 0.05244057836529534, 'tversky_beta': 1.3445997836299854}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2609.74 column/sec. Elapsed time 14.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.60 sec. Users per second: 899


[I 2024-12-16 13:22:47,916] Trial 469 finished with value: 0.25726421323228565 and parameters: {'similarity': 'tversky', 'topK': 58, 'shrink': 295, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14250695080486275, 'tversky_beta': 1.2642038623391199}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2620.21 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.69 sec. Users per second: 997


[I 2024-12-16 13:23:38,792] Trial 470 finished with value: 0.2601657735814791 and parameters: {'similarity': 'tversky', 'topK': 11, 'shrink': 256, 'feature_weighting': 'BM25', 'tversky_alpha': 0.09207602148258831, 'tversky_beta': 1.2086465041255339}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 413.60 column/sec. Elapsed time 1.54 min
EvaluatorHoldout: Processed 35595 (100.0%) in 38.96 sec. Users per second: 914


[I 2024-12-16 13:25:50,304] Trial 471 finished with value: 0.19724815915990151 and parameters: {'similarity': 'euclidean', 'topK': 76, 'shrink': 874, 'feature_weighting': 'none', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'exp', 'normalize': True}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2813.19 column/sec. Elapsed time 13.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.66 sec. Users per second: 945


[I 2024-12-16 13:26:42,158] Trial 472 finished with value: 0.25763737848553864 and parameters: {'similarity': 'jaccard', 'topK': 27, 'shrink': 201, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2594.81 column/sec. Elapsed time 14.69 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.19 sec. Users per second: 886


[I 2024-12-16 13:27:37,760] Trial 473 finished with value: 0.25470302483807755 and parameters: {'similarity': 'tversky', 'topK': 46, 'shrink': 952, 'feature_weighting': 'BM25', 'tversky_alpha': 0.03739982176487093, 'tversky_beta': 1.3021993140173347}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2601.01 column/sec. Elapsed time 14.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.79 sec. Users per second: 994


[I 2024-12-16 13:28:28,821] Trial 474 finished with value: 0.261358724853733 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 232, 'feature_weighting': 'BM25', 'tversky_alpha': 0.18101820507923225, 'tversky_beta': 1.243834554874462}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2597.39 column/sec. Elapsed time 14.68 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.99 sec. Users per second: 828


[I 2024-12-16 13:29:27,240] Trial 475 finished with value: 0.2211817146623289 and parameters: {'similarity': 'tversky', 'topK': 101, 'shrink': 277, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.26078800870818397, 'tversky_beta': 0.7287979959794011}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2595.83 column/sec. Elapsed time 14.69 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.26 sec. Users per second: 930


[I 2024-12-16 13:30:20,859] Trial 476 finished with value: 0.2611715087209362 and parameters: {'similarity': 'tversky', 'topK': 34, 'shrink': 247, 'feature_weighting': 'BM25', 'tversky_alpha': 0.11207200758889033, 'tversky_beta': 1.1505690255529355}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3795.92 column/sec. Elapsed time 10.04 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.39 sec. Users per second: 1299


[I 2024-12-16 13:30:58,858] Trial 477 finished with value: 0.005972113065949917 and parameters: {'similarity': 'dice', 'topK': 0, 'shrink': 209, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2617.07 column/sec. Elapsed time 14.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.84 sec. Users per second: 966


[I 2024-12-16 13:31:50,915] Trial 478 finished with value: 0.26228069274381777 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 228, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06422404068969702, 'tversky_beta': 1.3546755384118243}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2619.65 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.53 sec. Users per second: 924


[I 2024-12-16 13:32:44,669] Trial 479 finished with value: 0.25959761560818345 and parameters: {'similarity': 'tversky', 'topK': 47, 'shrink': 196, 'feature_weighting': 'BM25', 'tversky_alpha': 0.046024659701066575, 'tversky_beta': 1.3596611600903148}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2619.79 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.15 sec. Users per second: 958


[I 2024-12-16 13:33:37,044] Trial 480 finished with value: 0.26174121494073405 and parameters: {'similarity': 'tversky', 'topK': 28, 'shrink': 222, 'feature_weighting': 'BM25', 'tversky_alpha': 0.16198646501582542, 'tversky_beta': 1.315731894777431}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2600.23 column/sec. Elapsed time 14.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.66 sec. Users per second: 897


[I 2024-12-16 13:34:32,061] Trial 481 finished with value: 0.2568850314209589 and parameters: {'similarity': 'tversky', 'topK': 65, 'shrink': 224, 'feature_weighting': 'BM25', 'tversky_alpha': 0.21090739589260657, 'tversky_beta': 1.3608015908859847}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2603.69 column/sec. Elapsed time 14.64 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.78 sec. Users per second: 918


[I 2024-12-16 13:35:26,147] Trial 482 finished with value: 0.2606561083115394 and parameters: {'similarity': 'tversky', 'topK': 41, 'shrink': 263, 'feature_weighting': 'BM25', 'tversky_alpha': 0.13310191857744125, 'tversky_beta': 1.290506281233777}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2584.84 column/sec. Elapsed time 14.75 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.23 sec. Users per second: 956


[I 2024-12-16 13:36:18,781] Trial 483 finished with value: 0.261909633169775 and parameters: {'similarity': 'tversky', 'topK': 26, 'shrink': 200, 'feature_weighting': 'BM25', 'tversky_alpha': 0.06831889260172201, 'tversky_beta': 1.3327170920785396}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2612.20 column/sec. Elapsed time 14.59 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.09 sec. Users per second: 986


[I 2024-12-16 13:37:10,103] Trial 484 finished with value: 0.26200501555339156 and parameters: {'similarity': 'tversky', 'topK': 15, 'shrink': 236, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10592698675564688, 'tversky_beta': 1.2684154597714974}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2834.43 column/sec. Elapsed time 13.45 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.81 sec. Users per second: 1022


[I 2024-12-16 13:37:58,813] Trial 485 finished with value: 0.22032741954098328 and parameters: {'similarity': 'asymmetric', 'topK': 12, 'shrink': 273, 'feature_weighting': 'BM25', 'asymmetric_alpha': 1.7533305205363787}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2641.31 column/sec. Elapsed time 14.43 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.72 sec. Users per second: 997


[I 2024-12-16 13:38:49,641] Trial 486 finished with value: 0.2618240359072904 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 244, 'feature_weighting': 'BM25', 'tversky_alpha': 0.14083737237289184, 'tversky_beta': 1.2678373463853423}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2615.76 column/sec. Elapsed time 14.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.93 sec. Users per second: 964


[I 2024-12-16 13:39:41,780] Trial 487 finished with value: 0.26186373873496865 and parameters: {'similarity': 'tversky', 'topK': 22, 'shrink': 297, 'feature_weighting': 'BM25', 'tversky_alpha': 0.2307869459633782, 'tversky_beta': 1.2446531947152923}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3100.97 column/sec. Elapsed time 12.29 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.58 sec. Users per second: 1127


[I 2024-12-16 13:40:26,305] Trial 488 finished with value: 0.16478101311059565 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 235, 'feature_weighting': 'BM25', 'tversky_alpha': 0.10634389848313451, 'tversky_beta': 1.1724999862836494}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2519.76 column/sec. Elapsed time 15.13 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 46.48 sec. Users per second: 766


[I 2024-12-16 13:41:28,849] Trial 489 finished with value: 0.24323374715218737 and parameters: {'similarity': 'tversky', 'topK': 230, 'shrink': 259, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1725602280640209, 'tversky_beta': 1.2830322989733745}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 3330.76 column/sec. Elapsed time 11.45 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 27.74 sec. Users per second: 1283


[I 2024-12-16 13:42:08,674] Trial 490 finished with value: 0.005972113065949917 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 232, 'feature_weighting': 'BM25', 'tversky_alpha': 0.0996750506281258, 'tversky_beta': 1.2151360077259754}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2582.38 column/sec. Elapsed time 14.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.23 sec. Users per second: 907


[I 2024-12-16 13:43:03,376] Trial 491 finished with value: 0.2588232750999348 and parameters: {'similarity': 'tversky', 'topK': 48, 'shrink': 281, 'feature_weighting': 'BM25', 'tversky_alpha': 0.19823919219502503, 'tversky_beta': 1.107202019755325}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2587.86 column/sec. Elapsed time 14.73 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.99 sec. Users per second: 937


[I 2024-12-16 13:43:56,683] Trial 492 finished with value: 0.23951798475124816 and parameters: {'similarity': 'tversky', 'topK': 37, 'shrink': 254, 'feature_weighting': 'none', 'tversky_alpha': 0.1393377968105663, 'tversky_beta': 1.3007804640237715}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2574.47 column/sec. Elapsed time 14.81 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.01 sec. Users per second: 962


[I 2024-12-16 13:44:49,163] Trial 493 finished with value: 0.26189474873189905 and parameters: {'similarity': 'tversky', 'topK': 23, 'shrink': 213, 'feature_weighting': 'BM25', 'tversky_alpha': 0.03320200573709019, 'tversky_beta': 1.249745080289422}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2809.09 column/sec. Elapsed time 13.57 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.47 sec. Users per second: 1003


[I 2024-12-16 13:45:38,624] Trial 494 finished with value: 0.23047451652653117 and parameters: {'similarity': 'cosine', 'topK': 18, 'shrink': 187, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2631.15 column/sec. Elapsed time 14.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.43 sec. Users per second: 903


[I 2024-12-16 13:46:33,235] Trial 495 finished with value: 0.2577294600956793 and parameters: {'similarity': 'tversky', 'topK': 57, 'shrink': 269, 'feature_weighting': 'BM25', 'tversky_alpha': 0.26920004006178566, 'tversky_beta': 1.3657206931931891}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2600.03 column/sec. Elapsed time 14.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.11 sec. Users per second: 934


[I 2024-12-16 13:47:26,663] Trial 496 finished with value: 0.2611872693611949 and parameters: {'similarity': 'tversky', 'topK': 38, 'shrink': 231, 'feature_weighting': 'BM25', 'tversky_alpha': 0.08962629687207796, 'tversky_beta': 1.1855622952117981}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2536.89 column/sec. Elapsed time 15.03 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 47.01 sec. Users per second: 757


[I 2024-12-16 13:48:29,676] Trial 497 finished with value: 0.24156761386282768 and parameters: {'similarity': 'tversky', 'topK': 260, 'shrink': 250, 'feature_weighting': 'BM25', 'tversky_alpha': 0.1674990393210148, 'tversky_beta': 1.2618092643925902}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2501.57 column/sec. Elapsed time 15.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 53.41 sec. Users per second: 666


[I 2024-12-16 13:49:39,617] Trial 498 finished with value: 0.23008489341782692 and parameters: {'similarity': 'tversky', 'topK': 474, 'shrink': 222, 'feature_weighting': 'BM25', 'tversky_alpha': 0.13005954677090747, 'tversky_beta': 0.5848810867876356}. Best is trial 312 with value: 0.2627796224180537.


Similarity column 38121 (100.0%), 2770.61 column/sec. Elapsed time 13.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.24 sec. Users per second: 982


[I 2024-12-16 13:50:30,231] Trial 499 finished with value: 0.2571552224988509 and parameters: {'similarity': 'jaccard', 'topK': 12, 'shrink': 294, 'feature_weighting': 'BM25'}. Best is trial 312 with value: 0.2627796224180537.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ItemKNNCFRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Similarity column 38121 (100.0%), 2334.44 column/sec. Elapsed time 16.33 sec


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_ItemKNNCF_Recall.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/best_params_ItemKNNCF_Recall.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/history_ItemKNNCF_Recall.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithoutKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/Submission/submission_ItemKNNCF_Recall.csv' updated successfully.
