# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6312, done.[K
remote: Counting objects: 100% (223/223), done.[K
remote: Compressing objects: 100% (188/188), done.[K
remote: Total 6312 (delta 83), reused 25 (delta 10), pack-reused 6089 (from 4)[K
Receiving objects: 100% (6312/6312), 401.63 MiB | 23.32 MiB/s, done.
Resolving deltas: 100% (3463/3463), done.
Updating files: 100% (490/490), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K’:
30353 |       [01;35m[K__pyx_t_4

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/KNN'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'ItemKNNCF',
    'n_folds': 5,
    'metric': 'Recall',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNNCF_Recall.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender

def objective_function_ItemKNNCF(optuna_trial):
    
    recommender_instance = ItemKNNCFRecommender(URM_train)
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = ItemKNNCFRecommender(URM_train_fold)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[50])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[50]["RECALL"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ItemKNNCF, n_trials=50)

[I 2025-01-05 17:39:37,451] Using an existing study with name 'hyperparameters_tuning_ItemKNNCF_Recall' instead of creating a new one.


Similarity column 38121 (100.0%), 2881.20 column/sec. Elapsed time 13.23 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.88 sec. Users per second: 939
Similarity column 38121 (100.0%), 2898.62 column/sec. Elapsed time 13.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.78 sec. Users per second: 942
Similarity column 38121 (100.0%), 2940.06 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.80 sec. Users per second: 942
Similarity column 38121 (100.0%), 2890.17 column/sec. Elapsed time 13.19 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.58 sec. Users per second: 947
Similarity column 38121 (100.0%), 29

[I 2025-01-05 17:43:53,673] Trial 300 finished with value: 0.2594651705326456 and parameters: {'similarity': 'cosine', 'topK': 38, 'shrink': 426, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2910.93 column/sec. Elapsed time 13.10 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 35.14 sec. Users per second: 1012
Similarity column 38121 (100.0%), 2928.12 column/sec. Elapsed time 13.02 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.29 sec. Users per second: 1008
Similarity column 38121 (100.0%), 2913.64 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.07 sec. Users per second: 1015
Similarity column 38121 (100.0%), 2936.59 column/sec. Elapsed time 12.98 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.31 sec. Users per second: 1007
Similarity column 38121 (100.0%)

[I 2025-01-05 17:47:56,724] Trial 301 finished with value: 0.22958111317103339 and parameters: {'similarity': 'cosine', 'topK': 15, 'shrink': 462, 'feature_weighting': 'BM25'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2742.08 column/sec. Elapsed time 13.90 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 53.59 sec. Users per second: 664
Similarity column 38121 (100.0%), 2770.29 column/sec. Elapsed time 13.76 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 53.44 sec. Users per second: 666
Similarity column 38121 (100.0%), 2764.16 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 53.25 sec. Users per second: 668
Similarity column 38121 (100.0%), 2772.47 column/sec. Elapsed time 13.75 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 53.40 sec. Users per second: 666
Similarity column 38121 (100.0%), 27

[I 2025-01-05 17:53:38,011] Trial 302 finished with value: 0.24941654283595271 and parameters: {'similarity': 'cosine', 'topK': 558, 'shrink': 336, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2919.30 column/sec. Elapsed time 13.06 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 39.22 sec. Users per second: 907
Similarity column 38121 (100.0%), 2914.41 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 39.17 sec. Users per second: 908
Similarity column 38121 (100.0%), 2934.83 column/sec. Elapsed time 12.99 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 39.37 sec. Users per second: 904
Similarity column 38121 (100.0%), 2909.85 column/sec. Elapsed time 13.10 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 39.60 sec. Users per second: 898
Similarity column 38121 (100.0%), 28

[I 2025-01-05 17:58:02,015] Trial 303 finished with value: 0.2579482547122322 and parameters: {'similarity': 'cosine', 'topK': 65, 'shrink': 374, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2944.69 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.70 sec. Users per second: 969
Similarity column 38121 (100.0%), 2946.77 column/sec. Elapsed time 12.94 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.67 sec. Users per second: 970
Similarity column 38121 (100.0%), 2930.60 column/sec. Elapsed time 13.01 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.89 sec. Users per second: 965
Similarity column 38121 (100.0%), 2941.48 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.72 sec. Users per second: 969
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:02:12,240] Trial 304 finished with value: 0.2601834638208213 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 408, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2949.57 column/sec. Elapsed time 12.92 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.97 sec. Users per second: 937
Similarity column 38121 (100.0%), 2898.21 column/sec. Elapsed time 13.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.22 sec. Users per second: 931
Similarity column 38121 (100.0%), 2899.61 column/sec. Elapsed time 13.15 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 38.24 sec. Users per second: 931
Similarity column 38121 (100.0%), 2916.69 column/sec. Elapsed time 13.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.97 sec. Users per second: 937
Similarity column 38121 (100.0%), 28

[I 2025-01-05 18:06:29,671] Trial 305 finished with value: 0.2590896987297058 and parameters: {'similarity': 'cosine', 'topK': 43, 'shrink': 486, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2950.41 column/sec. Elapsed time 12.92 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 35.90 sec. Users per second: 991
Similarity column 38121 (100.0%), 2912.41 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.73 sec. Users per second: 996
Similarity column 38121 (100.0%), 2922.47 column/sec. Elapsed time 13.04 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.78 sec. Users per second: 995
Similarity column 38121 (100.0%), 2912.09 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.64 sec. Users per second: 998
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:10:34,944] Trial 306 finished with value: 0.25994874879872176 and parameters: {'similarity': 'cosine', 'topK': 16, 'shrink': 438, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2921.80 column/sec. Elapsed time 13.05 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 39.77 sec. Users per second: 894
Similarity column 38121 (100.0%), 2881.75 column/sec. Elapsed time 13.23 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 40.03 sec. Users per second: 889
Similarity column 38121 (100.0%), 2911.75 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 40.05 sec. Users per second: 888
Similarity column 38121 (100.0%), 2899.70 column/sec. Elapsed time 13.15 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 39.95 sec. Users per second: 890
Similarity column 38121 (100.0%), 28

[I 2025-01-05 18:15:02,346] Trial 307 finished with value: 0.2566554050110399 and parameters: {'similarity': 'cosine', 'topK': 86, 'shrink': 267, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2817.95 column/sec. Elapsed time 13.53 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 50.91 sec. Users per second: 699
Similarity column 38121 (100.0%), 2786.14 column/sec. Elapsed time 13.68 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 51.06 sec. Users per second: 697
Similarity column 38121 (100.0%), 2812.40 column/sec. Elapsed time 13.55 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 51.14 sec. Users per second: 696
Similarity column 38121 (100.0%), 2795.97 column/sec. Elapsed time 13.63 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 50.91 sec. Users per second: 699
Similarity column 38121 (100.0%), 27

[I 2025-01-05 18:20:29,993] Trial 308 finished with value: 0.25015859489277437 and parameters: {'similarity': 'cosine', 'topK': 465, 'shrink': 300, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2928.33 column/sec. Elapsed time 13.02 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.93 sec. Users per second: 963
Similarity column 38121 (100.0%), 2940.89 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.01 sec. Users per second: 961
Similarity column 38121 (100.0%), 2938.67 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.87 sec. Users per second: 965
Similarity column 38121 (100.0%), 2948.87 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.86 sec. Users per second: 965
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:24:41,157] Trial 309 finished with value: 0.2600386104686468 and parameters: {'similarity': 'cosine', 'topK': 29, 'shrink': 384, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2946.36 column/sec. Elapsed time 12.94 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.89 sec. Users per second: 1019
Similarity column 38121 (100.0%), 2933.64 column/sec. Elapsed time 12.99 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.08 sec. Users per second: 1014
Similarity column 38121 (100.0%), 2943.60 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.99 sec. Users per second: 1017
Similarity column 38121 (100.0%), 2949.03 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.18 sec. Users per second: 1011
Similarity column 38121 (100.0%)

[I 2025-01-05 18:28:42,728] Trial 310 finished with value: 0.2592319521116402 and parameters: {'similarity': 'cosine', 'topK': 11, 'shrink': 364, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2943.20 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.47 sec. Users per second: 924
Similarity column 38121 (100.0%), 2904.33 column/sec. Elapsed time 13.13 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.69 sec. Users per second: 919
Similarity column 38121 (100.0%), 2919.69 column/sec. Elapsed time 13.06 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 38.60 sec. Users per second: 922
Similarity column 38121 (100.0%), 2901.67 column/sec. Elapsed time 13.14 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 38.76 sec. Users per second: 918
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:33:03,075] Trial 311 finished with value: 0.2585951255947024 and parameters: {'similarity': 'cosine', 'topK': 53, 'shrink': 455, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2941.69 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.02 sec. Users per second: 961
Similarity column 38121 (100.0%), 2906.27 column/sec. Elapsed time 13.12 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.01 sec. Users per second: 961
Similarity column 38121 (100.0%), 2944.17 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.13 sec. Users per second: 958
Similarity column 38121 (100.0%), 2907.38 column/sec. Elapsed time 13.11 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.16 sec. Users per second: 957
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:37:15,282] Trial 312 finished with value: 0.26008564469795814 and parameters: {'similarity': 'cosine', 'topK': 30, 'shrink': 404, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2754.48 column/sec. Elapsed time 13.84 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.89 sec. Users per second: 964
Similarity column 38121 (100.0%), 2733.39 column/sec. Elapsed time 13.95 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.97 sec. Users per second: 962
Similarity column 38121 (100.0%), 2694.72 column/sec. Elapsed time 14.15 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.01 sec. Users per second: 961
Similarity column 38121 (100.0%), 2713.80 column/sec. Elapsed time 14.05 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.37 sec. Users per second: 952
Similarity column 38121 (100.0%), 27

[I 2025-01-05 18:41:33,005] Trial 313 finished with value: 0.21741802503013158 and parameters: {'similarity': 'tversky', 'topK': 14, 'shrink': 355, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.46900337037410833, 'tversky_beta': 0.29323403065756626}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2896.13 column/sec. Elapsed time 13.16 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 43.65 sec. Users per second: 815
Similarity column 38121 (100.0%), 2867.09 column/sec. Elapsed time 13.30 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 43.69 sec. Users per second: 814
Similarity column 38121 (100.0%), 2836.89 column/sec. Elapsed time 13.44 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 44.00 sec. Users per second: 809
Similarity column 38121 (100.0%), 2864.16 column/sec. Elapsed time 13.31 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 43.71 sec. Users per second: 814
Similarity column 38121 (100.0%), 28

[I 2025-01-05 18:46:21,022] Trial 314 finished with value: 0.2539877203172722 and parameters: {'similarity': 'cosine', 'topK': 182, 'shrink': 332, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3732.63 column/sec. Elapsed time 10.21 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 27.31 sec. Users per second: 1302
Similarity column 38121 (100.0%), 3700.08 column/sec. Elapsed time 10.30 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 27.23 sec. Users per second: 1306
Similarity column 38121 (100.0%), 3704.04 column/sec. Elapsed time 10.29 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 27.54 sec. Users per second: 1292
Similarity column 38121 (100.0%), 3679.40 column/sec. Elapsed time 10.36 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 27.14 sec. Users per second: 1310
Similarity column 38121 (100.0%)

[I 2025-01-05 18:49:30,570] Trial 315 finished with value: 0.0055265346604531345 and parameters: {'similarity': 'asymmetric', 'topK': 0, 'shrink': 517, 'feature_weighting': 'TF-IDF', 'asymmetric_alpha': 1.6630971112588246}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2943.16 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.09 sec. Users per second: 934
Similarity column 38121 (100.0%), 2904.70 column/sec. Elapsed time 13.12 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.19 sec. Users per second: 931
Similarity column 38121 (100.0%), 2915.49 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 38.30 sec. Users per second: 929
Similarity column 38121 (100.0%), 2873.01 column/sec. Elapsed time 13.27 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 38.13 sec. Users per second: 933
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:53:48,594] Trial 316 finished with value: 0.2589570531637072 and parameters: {'similarity': 'cosine', 'topK': 44, 'shrink': 491, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2920.42 column/sec. Elapsed time 13.05 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 39.33 sec. Users per second: 904
Similarity column 38121 (100.0%), 2901.72 column/sec. Elapsed time 13.14 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 39.40 sec. Users per second: 903
Similarity column 38121 (100.0%), 2923.30 column/sec. Elapsed time 13.04 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 39.96 sec. Users per second: 890
Similarity column 38121 (100.0%), 2914.24 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 39.52 sec. Users per second: 900
Similarity column 38121 (100.0%), 29

[I 2025-01-05 18:58:13,542] Trial 317 finished with value: 0.25770568201608424 and parameters: {'similarity': 'cosine', 'topK': 67, 'shrink': 425, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 533.08 column/sec. Elapsed time 1.19 min
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 29.71 sec. Users per second: 1197
Similarity column 38121 (100.0%), 459.24 column/sec. Elapsed time 1.38 min
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 29.89 sec. Users per second: 1190
Similarity column 38121 (100.0%), 525.19 column/sec. Elapsed time 1.21 min
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 29.57 sec. Users per second: 1203
Similarity column 38121 (100.0%), 485.26 column/sec. Elapsed time 1.31 min
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 29.78 sec. Users per second: 1195
Similarity column 38121 (100.0%), 488.97

[I 2025-01-05 19:07:06,942] Trial 318 finished with value: 0.0880585795268939 and parameters: {'similarity': 'euclidean', 'topK': 24, 'shrink': 388, 'feature_weighting': 'TF-IDF', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'log', 'normalize': False}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2952.13 column/sec. Elapsed time 12.91 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.82 sec. Users per second: 940
Similarity column 38121 (100.0%), 2895.25 column/sec. Elapsed time 13.17 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.88 sec. Users per second: 939
Similarity column 38121 (100.0%), 2930.08 column/sec. Elapsed time 13.01 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.89 sec. Users per second: 939
Similarity column 38121 (100.0%), 2915.62 column/sec. Elapsed time 13.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.83 sec. Users per second: 940
Similarity column 38121 (100.0%), 29

[I 2025-01-05 19:11:23,065] Trial 319 finished with value: 0.25938500137216963 and parameters: {'similarity': 'cosine', 'topK': 39, 'shrink': 474, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2962.52 column/sec. Elapsed time 12.87 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 35.46 sec. Users per second: 1003
Similarity column 38121 (100.0%), 2921.55 column/sec. Elapsed time 13.05 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.69 sec. Users per second: 997
Similarity column 38121 (100.0%), 2912.15 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.72 sec. Users per second: 996
Similarity column 38121 (100.0%), 2913.65 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.55 sec. Users per second: 1001
Similarity column 38121 (100.0%), 

[I 2025-01-05 19:15:27,540] Trial 320 finished with value: 0.2597812568469251 and parameters: {'similarity': 'cosine', 'topK': 14, 'shrink': 411, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2913.46 column/sec. Elapsed time 13.08 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.64 sec. Users per second: 971
Similarity column 38121 (100.0%), 2924.21 column/sec. Elapsed time 13.04 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.84 sec. Users per second: 966
Similarity column 38121 (100.0%), 2868.66 column/sec. Elapsed time 13.29 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.76 sec. Users per second: 968
Similarity column 38121 (100.0%), 2890.63 column/sec. Elapsed time 13.19 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.14 sec. Users per second: 958
Similarity column 38121 (100.0%), 29

[I 2025-01-05 19:19:39,076] Trial 321 finished with value: 0.23116128426210455 and parameters: {'similarity': 'cosine', 'topK': 31, 'shrink': 347, 'feature_weighting': 'BM25'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3024.33 column/sec. Elapsed time 12.60 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 40.14 sec. Users per second: 886
Similarity column 38121 (100.0%), 3034.65 column/sec. Elapsed time 12.56 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 40.03 sec. Users per second: 889
Similarity column 38121 (100.0%), 3009.97 column/sec. Elapsed time 12.66 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 40.06 sec. Users per second: 888
Similarity column 38121 (100.0%), 3004.27 column/sec. Elapsed time 12.69 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 39.91 sec. Users per second: 891
Similarity column 38121 (100.0%), 30

[I 2025-01-05 19:24:05,126] Trial 322 finished with value: 0.2291441194798381 and parameters: {'similarity': 'dice', 'topK': 52, 'shrink': 373, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2781.33 column/sec. Elapsed time 13.71 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 52.33 sec. Users per second: 680
Similarity column 38121 (100.0%), 2796.44 column/sec. Elapsed time 13.63 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 52.54 sec. Users per second: 677
Similarity column 38121 (100.0%), 2791.11 column/sec. Elapsed time 13.66 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 52.45 sec. Users per second: 678
Similarity column 38121 (100.0%), 2795.82 column/sec. Elapsed time 13.63 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 52.61 sec. Users per second: 676
Similarity column 38121 (100.0%), 27

[I 2025-01-05 19:29:40,680] Trial 323 finished with value: 0.24985205544205963 and parameters: {'similarity': 'cosine', 'topK': 504, 'shrink': 452, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3459.42 column/sec. Elapsed time 11.02 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 30.91 sec. Users per second: 1150
Similarity column 38121 (100.0%), 3430.75 column/sec. Elapsed time 11.11 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.02 sec. Users per second: 1147
Similarity column 38121 (100.0%), 3439.15 column/sec. Elapsed time 11.08 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 30.98 sec. Users per second: 1149
Similarity column 38121 (100.0%), 3439.82 column/sec. Elapsed time 11.08 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.14 sec. Users per second: 1142
Similarity column 38121 (100.0%)

[I 2025-01-05 19:33:12,563] Trial 324 finished with value: 0.15850607801390945 and parameters: {'similarity': 'cosine', 'topK': 1, 'shrink': 387, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2947.45 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.60 sec. Users per second: 972
Similarity column 38121 (100.0%), 2892.63 column/sec. Elapsed time 13.18 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.53 sec. Users per second: 974
Similarity column 38121 (100.0%), 2961.77 column/sec. Elapsed time 12.87 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.42 sec. Users per second: 977
Similarity column 38121 (100.0%), 2925.03 column/sec. Elapsed time 13.03 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.19 sec. Users per second: 983
Similarity column 38121 (100.0%), 29

[I 2025-01-05 19:37:21,199] Trial 325 finished with value: 0.26016129018239126 and parameters: {'similarity': 'cosine', 'topK': 23, 'shrink': 436, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2937.73 column/sec. Elapsed time 12.98 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.31 sec. Users per second: 953
Similarity column 38121 (100.0%), 2939.50 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.62 sec. Users per second: 946
Similarity column 38121 (100.0%), 2939.37 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.74 sec. Users per second: 943
Similarity column 38121 (100.0%), 2912.79 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.61 sec. Users per second: 946
Similarity column 38121 (100.0%), 29

[I 2025-01-05 19:41:35,460] Trial 326 finished with value: 0.2597370506135189 and parameters: {'similarity': 'cosine', 'topK': 36, 'shrink': 324, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2929.48 column/sec. Elapsed time 13.01 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 35.78 sec. Users per second: 994
Similarity column 38121 (100.0%), 2948.02 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.71 sec. Users per second: 996
Similarity column 38121 (100.0%), 2934.36 column/sec. Elapsed time 12.99 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.74 sec. Users per second: 996
Similarity column 38121 (100.0%), 2945.88 column/sec. Elapsed time 12.94 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.69 sec. Users per second: 997
Similarity column 38121 (100.0%), 29

[I 2025-01-05 19:45:40,379] Trial 327 finished with value: 0.2600367201176465 and parameters: {'similarity': 'cosine', 'topK': 16, 'shrink': 502, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2940.93 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 39.69 sec. Users per second: 896
Similarity column 38121 (100.0%), 2927.86 column/sec. Elapsed time 13.02 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 39.69 sec. Users per second: 896
Similarity column 38121 (100.0%), 2859.77 column/sec. Elapsed time 13.33 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 39.89 sec. Users per second: 892
Similarity column 38121 (100.0%), 2919.42 column/sec. Elapsed time 13.06 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 39.94 sec. Users per second: 891
Similarity column 38121 (100.0%), 28

[I 2025-01-05 19:50:06,822] Trial 328 finished with value: 0.25717057614970573 and parameters: {'similarity': 'cosine', 'topK': 76, 'shrink': 409, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2737.49 column/sec. Elapsed time 13.93 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.08 sec. Users per second: 623
Similarity column 38121 (100.0%), 2761.85 column/sec. Elapsed time 13.80 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.22 sec. Users per second: 622
Similarity column 38121 (100.0%), 2736.37 column/sec. Elapsed time 13.93 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 56.96 sec. Users per second: 625
Similarity column 38121 (100.0%), 2764.25 column/sec. Elapsed time 13.79 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.97 sec. Users per second: 624
Similarity column 38121 (100.0%), 27

[I 2025-01-05 19:56:07,932] Trial 329 finished with value: 0.24831692697656357 and parameters: {'similarity': 'cosine', 'topK': 741, 'shrink': 360, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2925.59 column/sec. Elapsed time 13.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.71 sec. Users per second: 919
Similarity column 38121 (100.0%), 2911.98 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.76 sec. Users per second: 918
Similarity column 38121 (100.0%), 2925.08 column/sec. Elapsed time 13.03 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 38.75 sec. Users per second: 918
Similarity column 38121 (100.0%), 2924.18 column/sec. Elapsed time 13.04 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 38.85 sec. Users per second: 915
Similarity column 38121 (100.0%), 29

[I 2025-01-05 20:00:29,602] Trial 330 finished with value: 0.2582856987402793 and parameters: {'similarity': 'cosine', 'topK': 55, 'shrink': 539, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2900.29 column/sec. Elapsed time 13.14 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.33 sec. Users per second: 928
Similarity column 38121 (100.0%), 2878.25 column/sec. Elapsed time 13.24 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.08 sec. Users per second: 934
Similarity column 38121 (100.0%), 2907.00 column/sec. Elapsed time 13.11 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.52 sec. Users per second: 948
Similarity column 38121 (100.0%), 2941.49 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.50 sec. Users per second: 948
Similarity column 38121 (100.0%), 29

[I 2025-01-05 20:04:45,823] Trial 331 finished with value: 0.25941580123030095 and parameters: {'similarity': 'cosine', 'topK': 38, 'shrink': 395, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2957.05 column/sec. Elapsed time 12.89 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.71 sec. Users per second: 1025
Similarity column 38121 (100.0%), 2881.70 column/sec. Elapsed time 13.23 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.83 sec. Users per second: 1021
Similarity column 38121 (100.0%), 2932.68 column/sec. Elapsed time 13.00 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.73 sec. Users per second: 1025
Similarity column 38121 (100.0%), 2932.06 column/sec. Elapsed time 13.00 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.72 sec. Users per second: 1024
Similarity column 38121 (100.0%)

[I 2025-01-05 20:08:46,236] Trial 332 finished with value: 0.25902385793599475 and parameters: {'similarity': 'cosine', 'topK': 11, 'shrink': 302, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2939.79 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.21 sec. Users per second: 982
Similarity column 38121 (100.0%), 2949.19 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.60 sec. Users per second: 972
Similarity column 38121 (100.0%), 2947.09 column/sec. Elapsed time 12.94 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.45 sec. Users per second: 976
Similarity column 38121 (100.0%), 2957.54 column/sec. Elapsed time 12.89 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.39 sec. Users per second: 977
Similarity column 38121 (100.0%), 29

[I 2025-01-05 20:12:54,488] Trial 333 finished with value: 0.2602165828964806 and parameters: {'similarity': 'cosine', 'topK': 25, 'shrink': 341, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3781.74 column/sec. Elapsed time 10.08 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 27.11 sec. Users per second: 1312
Similarity column 38121 (100.0%), 3754.90 column/sec. Elapsed time 10.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 27.36 sec. Users per second: 1300
Similarity column 38121 (100.0%), 3748.69 column/sec. Elapsed time 10.17 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 27.19 sec. Users per second: 1309
Similarity column 38121 (100.0%), 3789.37 column/sec. Elapsed time 10.06 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 27.17 sec. Users per second: 1309
Similarity column 38121 (100.0%)

[I 2025-01-05 20:16:02,350] Trial 334 finished with value: 0.0055265346604531345 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 334, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2944.24 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.06 sec. Users per second: 934
Similarity column 38121 (100.0%), 2918.75 column/sec. Elapsed time 13.06 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.00 sec. Users per second: 936
Similarity column 38121 (100.0%), 2908.65 column/sec. Elapsed time 13.11 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.98 sec. Users per second: 937
Similarity column 38121 (100.0%), 2917.49 column/sec. Elapsed time 13.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 38.11 sec. Users per second: 933
Similarity column 38121 (100.0%), 28

[I 2025-01-05 20:20:19,490] Trial 335 finished with value: 0.259047320965904 and parameters: {'similarity': 'cosine', 'topK': 46, 'shrink': 347, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2936.39 column/sec. Elapsed time 12.98 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.67 sec. Users per second: 970
Similarity column 38121 (100.0%), 2931.22 column/sec. Elapsed time 13.01 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.64 sec. Users per second: 971
Similarity column 38121 (100.0%), 2933.88 column/sec. Elapsed time 12.99 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.59 sec. Users per second: 972
Similarity column 38121 (100.0%), 2935.09 column/sec. Elapsed time 12.99 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.65 sec. Users per second: 970
Similarity column 38121 (100.0%), 29

[I 2025-01-05 20:24:29,251] Trial 336 finished with value: 0.2602042368707474 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 372, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2937.87 column/sec. Elapsed time 12.98 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 38.62 sec. Users per second: 921
Similarity column 38121 (100.0%), 2905.00 column/sec. Elapsed time 13.12 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 38.86 sec. Users per second: 916
Similarity column 38121 (100.0%), 2913.01 column/sec. Elapsed time 13.09 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 38.68 sec. Users per second: 920
Similarity column 38121 (100.0%), 2925.01 column/sec. Elapsed time 13.03 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 38.92 sec. Users per second: 914
Similarity column 38121 (100.0%), 28

[I 2025-01-05 20:28:50,616] Trial 337 finished with value: 0.2582206446586085 and parameters: {'similarity': 'cosine', 'topK': 60, 'shrink': 321, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2957.16 column/sec. Elapsed time 12.89 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.75 sec. Users per second: 968
Similarity column 38121 (100.0%), 2940.81 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.88 sec. Users per second: 965
Similarity column 38121 (100.0%), 2946.92 column/sec. Elapsed time 12.94 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.70 sec. Users per second: 970
Similarity column 38121 (100.0%), 2947.62 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.96 sec. Users per second: 962
Similarity column 38121 (100.0%), 29

[I 2025-01-05 20:33:00,886] Trial 338 finished with value: 0.26008694878407146 and parameters: {'similarity': 'cosine', 'topK': 30, 'shrink': 348, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2959.72 column/sec. Elapsed time 12.88 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 35.10 sec. Users per second: 1013
Similarity column 38121 (100.0%), 2932.19 column/sec. Elapsed time 13.00 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.17 sec. Users per second: 1012
Similarity column 38121 (100.0%), 2953.65 column/sec. Elapsed time 12.91 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.14 sec. Users per second: 1013
Similarity column 38121 (100.0%), 2939.17 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 35.29 sec. Users per second: 1008
Similarity column 38121 (100.0%)

[I 2025-01-05 20:37:02,718] Trial 339 finished with value: 0.25971224492230577 and parameters: {'similarity': 'cosine', 'topK': 13, 'shrink': 369, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2670.97 column/sec. Elapsed time 14.27 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 48.45 sec. Users per second: 734
Similarity column 38121 (100.0%), 2650.42 column/sec. Elapsed time 14.38 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 48.67 sec. Users per second: 731
Similarity column 38121 (100.0%), 2661.22 column/sec. Elapsed time 14.32 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 48.53 sec. Users per second: 733
Similarity column 38121 (100.0%), 2654.72 column/sec. Elapsed time 14.36 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 48.60 sec. Users per second: 732
Similarity column 38121 (100.0%), 26

[I 2025-01-05 20:42:21,364] Trial 340 finished with value: 0.2218098485066912 and parameters: {'similarity': 'tversky', 'topK': 269, 'shrink': 388, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 1.6097015695752648, 'tversky_beta': 1.0232320873041325}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2851.36 column/sec. Elapsed time 13.37 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 46.17 sec. Users per second: 770
Similarity column 38121 (100.0%), 2827.83 column/sec. Elapsed time 13.48 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 46.41 sec. Users per second: 767
Similarity column 38121 (100.0%), 2850.21 column/sec. Elapsed time 13.37 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 46.24 sec. Users per second: 770
Similarity column 38121 (100.0%), 2868.77 column/sec. Elapsed time 13.29 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 46.29 sec. Users per second: 768
Similarity column 38121 (100.0%), 28

[I 2025-01-05 20:47:22,709] Trial 341 finished with value: 0.25197504054199743 and parameters: {'similarity': 'cosine', 'topK': 286, 'shrink': 283, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2877.53 column/sec. Elapsed time 13.25 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 44.75 sec. Users per second: 795
Similarity column 38121 (100.0%), 2868.97 column/sec. Elapsed time 13.29 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 44.75 sec. Users per second: 795
Similarity column 38121 (100.0%), 2870.88 column/sec. Elapsed time 13.28 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 44.72 sec. Users per second: 796
Similarity column 38121 (100.0%), 2847.68 column/sec. Elapsed time 13.39 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 44.60 sec. Users per second: 797
Similarity column 38121 (100.0%), 28

[I 2025-01-05 20:52:15,510] Trial 342 finished with value: 0.25329318927483335 and parameters: {'similarity': 'cosine', 'topK': 210, 'shrink': 417, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2903.31 column/sec. Elapsed time 13.13 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.52 sec. Users per second: 948
Similarity column 38121 (100.0%), 2895.25 column/sec. Elapsed time 13.17 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.50 sec. Users per second: 949
Similarity column 38121 (100.0%), 2864.54 column/sec. Elapsed time 13.31 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.51 sec. Users per second: 949
Similarity column 38121 (100.0%), 2897.05 column/sec. Elapsed time 13.16 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.58 sec. Users per second: 946
Similarity column 38121 (100.0%), 28

[I 2025-01-05 20:56:30,972] Trial 343 finished with value: 0.22890498543350976 and parameters: {'similarity': 'asymmetric', 'topK': 46, 'shrink': 308, 'feature_weighting': 'BM25', 'asymmetric_alpha': 1.093984249246522}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2942.29 column/sec. Elapsed time 12.96 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.53 sec. Users per second: 974
Similarity column 38121 (100.0%), 2938.43 column/sec. Elapsed time 12.97 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.54 sec. Users per second: 974
Similarity column 38121 (100.0%), 2950.97 column/sec. Elapsed time 12.92 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.48 sec. Users per second: 975
Similarity column 38121 (100.0%), 2949.92 column/sec. Elapsed time 12.92 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.73 sec. Users per second: 968
Similarity column 38121 (100.0%), 29

[I 2025-01-05 21:00:40,139] Trial 344 finished with value: 0.26016787043909445 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 344, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 537.55 column/sec. Elapsed time 1.18 min
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.64 sec. Users per second: 1027
Similarity column 38121 (100.0%), 545.67 column/sec. Elapsed time 1.16 min
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.87 sec. Users per second: 1020
Similarity column 38121 (100.0%), 550.57 column/sec. Elapsed time 1.15 min
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.67 sec. Users per second: 1026
Similarity column 38121 (100.0%), 551.62 column/sec. Elapsed time 1.15 min
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.65 sec. Users per second: 1026
Similarity column 38121 (100.0%), 535.62

[I 2025-01-05 21:09:25,074] Trial 345 finished with value: 0.24097187263705983 and parameters: {'similarity': 'euclidean', 'topK': 12, 'shrink': 364, 'feature_weighting': 'TF-IDF', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2927.45 column/sec. Elapsed time 13.02 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 37.49 sec. Users per second: 949
Similarity column 38121 (100.0%), 2943.93 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 37.56 sec. Users per second: 947
Similarity column 38121 (100.0%), 2926.28 column/sec. Elapsed time 13.03 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 37.58 sec. Users per second: 947
Similarity column 38121 (100.0%), 2909.74 column/sec. Elapsed time 13.10 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 37.45 sec. Users per second: 950
Similarity column 38121 (100.0%), 29

[I 2025-01-05 21:13:39,090] Trial 346 finished with value: 0.2596410261874563 and parameters: {'similarity': 'cosine', 'topK': 37, 'shrink': 398, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3780.42 column/sec. Elapsed time 10.08 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 27.50 sec. Users per second: 1293
Similarity column 38121 (100.0%), 3757.48 column/sec. Elapsed time 10.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 27.14 sec. Users per second: 1311
Similarity column 38121 (100.0%), 3777.38 column/sec. Elapsed time 10.09 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 27.36 sec. Users per second: 1301
Similarity column 38121 (100.0%), 3792.54 column/sec. Elapsed time 10.05 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 27.32 sec. Users per second: 1302
Similarity column 38121 (100.0%)

[I 2025-01-05 21:16:47,434] Trial 347 finished with value: 0.0055265346604531345 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 519, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 3041.23 column/sec. Elapsed time 12.53 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 40.75 sec. Users per second: 873
Similarity column 38121 (100.0%), 3050.38 column/sec. Elapsed time 12.50 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 40.81 sec. Users per second: 872
Similarity column 38121 (100.0%), 3038.56 column/sec. Elapsed time 12.55 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 40.75 sec. Users per second: 873
Similarity column 38121 (100.0%), 3035.29 column/sec. Elapsed time 12.56 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 41.06 sec. Users per second: 866
Similarity column 38121 (100.0%), 30

[I 2025-01-05 21:21:17,102] Trial 348 finished with value: 0.2263192919508843 and parameters: {'similarity': 'dice', 'topK': 64, 'shrink': 435, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


Similarity column 38121 (100.0%), 2948.95 column/sec. Elapsed time 12.93 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.41 sec. Users per second: 977
Similarity column 38121 (100.0%), 2936.64 column/sec. Elapsed time 12.98 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 36.79 sec. Users per second: 967
Similarity column 38121 (100.0%), 2949.69 column/sec. Elapsed time 12.92 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 36.47 sec. Users per second: 976
Similarity column 38121 (100.0%), 2929.87 column/sec. Elapsed time 13.01 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 36.44 sec. Users per second: 976
Similarity column 38121 (100.0%), 29

[I 2025-01-05 21:25:25,974] Trial 349 finished with value: 0.2602107262461635 and parameters: {'similarity': 'cosine', 'topK': 24, 'shrink': 472, 'feature_weighting': 'TF-IDF'}. Best is trial 172 with value: 0.26030777685753526.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ItemKNNCFRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Similarity column 38121 (100.0%), 2617.76 column/sec. Elapsed time 14.56 sec


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_ItemKNNCF_Recall.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/best_params_ItemKNNCF_Recall.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/history_ItemKNNCF_Recall.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCFRecommender/OptimizingRecall/Submission/submission_ItemKNNCF_Recall.csv' updated successfully.
