# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6760, done.[K
remote: Counting objects: 100% (310/310), done.[K
remote: Compressing objects: 100% (268/268), done.[K
remote: Total 6760 (delta 118), reused 29 (delta 6), pack-reused 6450 (from 2)[K
Receiving objects: 100% (6760/6760), 409.66 MiB | 38.24 MiB/s, done.
Resolving deltas: 100% (3693/3693), done.
Updating files: 100% (506/506), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K’:
26256 |         for (__pyx_t_22 = __pyx_v_start_pos_seen_i

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/KNN'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'ItemKNNCBF',
    'n_folds': 5,
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNNCBF_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender

def objective_function_ItemKNNCBF(optuna_trial):
    
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = ItemKNNCBFRecommender(URM_train_fold, ICM_all)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[10])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[10]["MAP"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_ItemKNNCBF, n_trials=50)

[I 2025-01-06 10:11:35,643] Using an existing study with name 'hyperparameters_tuning_ItemKNNCBF_MAP' instead of creating a new one.


Similarity column 38121 (100.0%), 738.80 column/sec. Elapsed time 51.60 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.63 sec. Users per second: 1124
Similarity column 38121 (100.0%), 738.90 column/sec. Elapsed time 51.59 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.08 sec. Users per second: 1145
Similarity column 38121 (100.0%), 749.35 column/sec. Elapsed time 50.87 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.38 sec. Users per second: 1134
Similarity column 38121 (100.0%), 743.03 column/sec. Elapsed time 51.30 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.27 sec. Users per second: 1138
Similarity column 38121 (100.0%), 75

[I 2025-01-06 10:18:29,480] Trial 159 finished with value: 0.01862958573382582 and parameters: {'similarity': 'cosine', 'topK': 13, 'shrink': 603, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 747.19 column/sec. Elapsed time 51.02 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.30 sec. Users per second: 1101
Similarity column 38121 (100.0%), 748.13 column/sec. Elapsed time 50.96 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.53 sec. Users per second: 1094
Similarity column 38121 (100.0%), 749.14 column/sec. Elapsed time 50.89 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.51 sec. Users per second: 1095
Similarity column 38121 (100.0%), 748.69 column/sec. Elapsed time 50.92 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.65 sec. Users per second: 1089
Similarity column 38121 (100.0%), 75

[I 2025-01-06 10:25:28,968] Trial 160 finished with value: 0.018460568943672925 and parameters: {'similarity': 'cosine', 'topK': 30, 'shrink': 603, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 726.37 column/sec. Elapsed time 52.48 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 47.42 sec. Users per second: 750
Similarity column 38121 (100.0%), 718.61 column/sec. Elapsed time 53.05 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 46.92 sec. Users per second: 758
Similarity column 38121 (100.0%), 718.58 column/sec. Elapsed time 53.05 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 46.57 sec. Users per second: 764
Similarity column 38121 (100.0%), 721.49 column/sec. Elapsed time 52.84 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 46.77 sec. Users per second: 761
Similarity column 38121 (100.0%), 717.50

[I 2025-01-06 10:33:51,895] Trial 161 finished with value: 0.016812941008761705 and parameters: {'similarity': 'cosine', 'topK': 403, 'shrink': 524, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 726.87 column/sec. Elapsed time 52.45 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.74 sec. Users per second: 1086
Similarity column 38121 (100.0%), 701.75 column/sec. Elapsed time 54.32 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.44 sec. Users per second: 1096
Similarity column 38121 (100.0%), 703.50 column/sec. Elapsed time 54.19 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.47 sec. Users per second: 1063
Similarity column 38121 (100.0%), 675.53 column/sec. Elapsed time 56.43 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.96 sec. Users per second: 1047
Similarity column 38121 (100.0%), 67

[I 2025-01-06 10:41:13,756] Trial 162 finished with value: 0.018620665383481574 and parameters: {'similarity': 'cosine', 'topK': 10, 'shrink': 541, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 694.40 column/sec. Elapsed time 54.90 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.51 sec. Users per second: 1094
Similarity column 38121 (100.0%), 714.48 column/sec. Elapsed time 53.36 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.15 sec. Users per second: 1073
Similarity column 38121 (100.0%), 723.80 column/sec. Elapsed time 52.67 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.46 sec. Users per second: 1096
Similarity column 38121 (100.0%), 722.52 column/sec. Elapsed time 52.76 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.40 sec. Users per second: 1098
Similarity column 38121 (100.0%), 72

[I 2025-01-06 10:48:24,684] Trial 163 finished with value: 0.018569054714278783 and parameters: {'similarity': 'cosine', 'topK': 16, 'shrink': 580, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 724.63 column/sec. Elapsed time 52.61 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.03 sec. Users per second: 1045
Similarity column 38121 (100.0%), 725.39 column/sec. Elapsed time 52.55 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.90 sec. Users per second: 1049
Similarity column 38121 (100.0%), 729.26 column/sec. Elapsed time 52.27 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.23 sec. Users per second: 1040
Similarity column 38121 (100.0%), 721.28 column/sec. Elapsed time 52.85 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.92 sec. Users per second: 1049
Similarity column 38121 (100.0%), 73

[I 2025-01-06 10:55:39,191] Trial 164 finished with value: 0.018298051446181608 and parameters: {'similarity': 'cosine', 'topK': 42, 'shrink': 626, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 732.42 column/sec. Elapsed time 52.05 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.95 sec. Users per second: 1079
Similarity column 38121 (100.0%), 731.05 column/sec. Elapsed time 52.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.97 sec. Users per second: 1079
Similarity column 38121 (100.0%), 730.64 column/sec. Elapsed time 52.17 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.39 sec. Users per second: 1099
Similarity column 38121 (100.0%), 743.70 column/sec. Elapsed time 51.26 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.52 sec. Users per second: 1094
Similarity column 38121 (100.0%), 72

[I 2025-01-06 11:02:45,343] Trial 165 finished with value: 0.018554747185112755 and parameters: {'similarity': 'cosine', 'topK': 23, 'shrink': 557, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 963.55 column/sec. Elapsed time 39.56 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 27.74 sec. Users per second: 1282
Similarity column 38121 (100.0%), 950.95 column/sec. Elapsed time 40.09 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 27.89 sec. Users per second: 1275
Similarity column 38121 (100.0%), 951.79 column/sec. Elapsed time 40.05 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 27.70 sec. Users per second: 1285
Similarity column 38121 (100.0%), 956.57 column/sec. Elapsed time 39.85 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 27.67 sec. Users per second: 1286
Similarity column 38121 (100.0%), 96

[I 2025-01-06 11:08:25,018] Trial 166 finished with value: 0.01584246989308005 and parameters: {'similarity': 'cosine', 'topK': 1, 'shrink': 510, 'feature_weighting': 'BM25'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 738.95 column/sec. Elapsed time 51.59 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.93 sec. Users per second: 1018
Similarity column 38121 (100.0%), 731.00 column/sec. Elapsed time 52.15 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.76 sec. Users per second: 1024
Similarity column 38121 (100.0%), 732.28 column/sec. Elapsed time 52.06 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.06 sec. Users per second: 1015
Similarity column 38121 (100.0%), 738.14 column/sec. Elapsed time 51.64 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.71 sec. Users per second: 1025
Similarity column 38121 (100.0%), 73

[I 2025-01-06 11:15:40,918] Trial 167 finished with value: 0.018051962442254356 and parameters: {'similarity': 'cosine', 'topK': 58, 'shrink': 984, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 732.71 column/sec. Elapsed time 52.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.54 sec. Users per second: 1060
Similarity column 38121 (100.0%), 723.07 column/sec. Elapsed time 52.72 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.95 sec. Users per second: 1048
Similarity column 38121 (100.0%), 721.19 column/sec. Elapsed time 52.86 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.46 sec. Users per second: 1064
Similarity column 38121 (100.0%), 723.62 column/sec. Elapsed time 52.68 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.66 sec. Users per second: 1057
Similarity column 38121 (100.0%), 72

[I 2025-01-06 11:22:54,136] Trial 168 finished with value: 0.01841003517076179 and parameters: {'similarity': 'cosine', 'topK': 33, 'shrink': 592, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 725.89 column/sec. Elapsed time 52.52 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.01 sec. Users per second: 1111
Similarity column 38121 (100.0%), 727.53 column/sec. Elapsed time 52.40 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.82 sec. Users per second: 1118
Similarity column 38121 (100.0%), 729.36 column/sec. Elapsed time 52.27 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.80 sec. Users per second: 1119
Similarity column 38121 (100.0%), 733.58 column/sec. Elapsed time 51.97 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.69 sec. Users per second: 1123
Similarity column 38121 (100.0%), 73

[I 2025-01-06 11:29:55,741] Trial 169 finished with value: 0.01864159067934202 and parameters: {'similarity': 'cosine', 'topK': 12, 'shrink': 497, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 741.31 column/sec. Elapsed time 51.42 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.38 sec. Users per second: 1034
Similarity column 38121 (100.0%), 744.05 column/sec. Elapsed time 51.23 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.43 sec. Users per second: 1033
Similarity column 38121 (100.0%), 740.45 column/sec. Elapsed time 51.48 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.42 sec. Users per second: 1034
Similarity column 38121 (100.0%), 744.24 column/sec. Elapsed time 51.22 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.80 sec. Users per second: 1022
Similarity column 38121 (100.0%), 73

[I 2025-01-06 11:37:09,326] Trial 170 finished with value: 0.013065215786960478 and parameters: {'similarity': 'dice', 'topK': 50, 'shrink': 438, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 735.45 column/sec. Elapsed time 51.83 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.52 sec. Users per second: 1128
Similarity column 38121 (100.0%), 743.72 column/sec. Elapsed time 51.26 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.72 sec. Users per second: 1121
Similarity column 38121 (100.0%), 739.78 column/sec. Elapsed time 51.53 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.37 sec. Users per second: 1134
Similarity column 38121 (100.0%), 744.86 column/sec. Elapsed time 51.18 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.58 sec. Users per second: 1126
Similarity column 38121 (100.0%), 72

[I 2025-01-06 11:44:07,713] Trial 171 finished with value: 0.018643973530609002 and parameters: {'similarity': 'cosine', 'topK': 13, 'shrink': 491, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 710.33 column/sec. Elapsed time 53.67 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 48.74 sec. Users per second: 730
Similarity column 38121 (100.0%), 709.34 column/sec. Elapsed time 53.74 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 49.21 sec. Users per second: 723
Similarity column 38121 (100.0%), 713.46 column/sec. Elapsed time 53.43 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 49.48 sec. Users per second: 719
Similarity column 38121 (100.0%), 708.56 column/sec. Elapsed time 53.80 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 48.85 sec. Users per second: 728
Similarity column 38121 (100.0%), 708.97

[I 2025-01-06 11:52:46,149] Trial 172 finished with value: 0.01663238488574889 and parameters: {'similarity': 'cosine', 'topK': 472, 'shrink': 468, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 1038.71 column/sec. Elapsed time 36.70 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 24.39 sec. Users per second: 1458
Similarity column 38121 (100.0%), 1036.16 column/sec. Elapsed time 36.79 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 24.48 sec. Users per second: 1453
Similarity column 38121 (100.0%), 1040.11 column/sec. Elapsed time 36.65 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 24.04 sec. Users per second: 1480
Similarity column 38121 (100.0%), 1041.52 column/sec. Elapsed time 36.60 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 24.36 sec. Users per second: 1460
Similarity column 38121 (100.0%)

[I 2025-01-06 11:57:52,652] Trial 173 finished with value: 0.0002664826418719655 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 486, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 722.02 column/sec. Elapsed time 52.80 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.64 sec. Users per second: 1057
Similarity column 38121 (100.0%), 715.39 column/sec. Elapsed time 53.29 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.60 sec. Users per second: 1059
Similarity column 38121 (100.0%), 719.35 column/sec. Elapsed time 52.99 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.24 sec. Users per second: 1070
Similarity column 38121 (100.0%), 725.00 column/sec. Elapsed time 52.58 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.34 sec. Users per second: 1067
Similarity column 38121 (100.0%), 71

[I 2025-01-06 12:05:06,399] Trial 174 finished with value: 0.018502745546591214 and parameters: {'similarity': 'cosine', 'topK': 25, 'shrink': 500, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 723.15 column/sec. Elapsed time 52.72 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.23 sec. Users per second: 1103
Similarity column 38121 (100.0%), 718.92 column/sec. Elapsed time 53.03 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.98 sec. Users per second: 1112
Similarity column 38121 (100.0%), 720.63 column/sec. Elapsed time 52.90 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.73 sec. Users per second: 1121
Similarity column 38121 (100.0%), 729.85 column/sec. Elapsed time 52.23 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.69 sec. Users per second: 1123
Similarity column 38121 (100.0%), 72

[I 2025-01-06 12:12:11,068] Trial 175 finished with value: 0.018658230191451727 and parameters: {'similarity': 'cosine', 'topK': 12, 'shrink': 540, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 722.01 column/sec. Elapsed time 52.80 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.32 sec. Users per second: 1100
Similarity column 38121 (100.0%), 729.42 column/sec. Elapsed time 52.26 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.59 sec. Users per second: 1091
Similarity column 38121 (100.0%), 729.56 column/sec. Elapsed time 52.25 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.22 sec. Users per second: 1104
Similarity column 38121 (100.0%), 734.35 column/sec. Elapsed time 51.91 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.58 sec. Users per second: 1092
Similarity column 38121 (100.0%), 72

[I 2025-01-06 12:19:16,441] Trial 176 finished with value: 0.01857017123579111 and parameters: {'similarity': 'cosine', 'topK': 16, 'shrink': 536, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 732.91 column/sec. Elapsed time 52.01 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.18 sec. Users per second: 1040
Similarity column 38121 (100.0%), 722.02 column/sec. Elapsed time 52.80 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.98 sec. Users per second: 1047
Similarity column 38121 (100.0%), 717.96 column/sec. Elapsed time 53.10 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.47 sec. Users per second: 1032
Similarity column 38121 (100.0%), 720.84 column/sec. Elapsed time 52.88 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.06 sec. Users per second: 1044
Similarity column 38121 (100.0%), 72

[I 2025-01-06 12:26:32,699] Trial 177 finished with value: 0.01832033927065363 and parameters: {'similarity': 'cosine', 'topK': 38, 'shrink': 517, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 724.21 column/sec. Elapsed time 52.64 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.19 sec. Users per second: 1105
Similarity column 38121 (100.0%), 720.91 column/sec. Elapsed time 52.88 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.70 sec. Users per second: 1088
Similarity column 38121 (100.0%), 725.44 column/sec. Elapsed time 52.55 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.43 sec. Users per second: 1097
Similarity column 38121 (100.0%), 722.22 column/sec. Elapsed time 52.78 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.45 sec. Users per second: 1096
Similarity column 38121 (100.0%), 71

[I 2025-01-06 12:33:40,808] Trial 178 finished with value: 0.018553408834015343 and parameters: {'similarity': 'cosine', 'topK': 16, 'shrink': 553, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 717.63 column/sec. Elapsed time 53.12 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.70 sec. Users per second: 1055
Similarity column 38121 (100.0%), 719.82 column/sec. Elapsed time 52.96 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.13 sec. Users per second: 1042
Similarity column 38121 (100.0%), 718.70 column/sec. Elapsed time 53.04 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.51 sec. Users per second: 1062
Similarity column 38121 (100.0%), 719.85 column/sec. Elapsed time 52.96 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.51 sec. Users per second: 1062
Similarity column 38121 (100.0%), 72

[I 2025-01-06 12:40:55,588] Trial 179 finished with value: 0.018363634941642244 and parameters: {'similarity': 'cosine', 'topK': 31, 'shrink': 496, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 251.39 column/sec. Elapsed time 2.53 min
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 30.25 sec. Users per second: 1176
Similarity column 38121 (100.0%), 250.80 column/sec. Elapsed time 2.53 min
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 30.47 sec. Users per second: 1167
Similarity column 38121 (100.0%), 243.28 column/sec. Elapsed time 2.61 min
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 30.49 sec. Users per second: 1167
Similarity column 38121 (100.0%), 243.26 column/sec. Elapsed time 2.61 min
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 30.82 sec. Users per second: 1154
Similarity column 38121 (100.0%), 251.04

[I 2025-01-06 12:56:18,644] Trial 180 finished with value: 0.00976952647803173 and parameters: {'similarity': 'euclidean', 'topK': 12, 'shrink': 477, 'feature_weighting': 'TF-IDF', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'lin', 'normalize': False}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 728.11 column/sec. Elapsed time 52.36 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.70 sec. Users per second: 1122
Similarity column 38121 (100.0%), 728.13 column/sec. Elapsed time 52.35 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.37 sec. Users per second: 1134
Similarity column 38121 (100.0%), 740.02 column/sec. Elapsed time 51.51 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.47 sec. Users per second: 1131
Similarity column 38121 (100.0%), 735.77 column/sec. Elapsed time 51.81 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.39 sec. Users per second: 1133
Similarity column 38121 (100.0%), 73

[I 2025-01-06 13:03:17,851] Trial 181 finished with value: 0.018625513080220805 and parameters: {'similarity': 'cosine', 'topK': 10, 'shrink': 566, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 1063.57 column/sec. Elapsed time 35.84 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 23.86 sec. Users per second: 1491
Similarity column 38121 (100.0%), 1057.04 column/sec. Elapsed time 36.06 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 24.04 sec. Users per second: 1479
Similarity column 38121 (100.0%), 1059.67 column/sec. Elapsed time 35.97 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 23.91 sec. Users per second: 1488
Similarity column 38121 (100.0%), 1062.07 column/sec. Elapsed time 35.89 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 23.92 sec. Users per second: 1487
Similarity column 38121 (100.0%)

[I 2025-01-06 13:08:18,812] Trial 182 finished with value: 0.0002664826418719655 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 570, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 748.55 column/sec. Elapsed time 50.93 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.44 sec. Users per second: 1096
Similarity column 38121 (100.0%), 734.36 column/sec. Elapsed time 51.91 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.74 sec. Users per second: 1086
Similarity column 38121 (100.0%), 738.58 column/sec. Elapsed time 51.61 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.58 sec. Users per second: 1092
Similarity column 38121 (100.0%), 741.02 column/sec. Elapsed time 51.44 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.86 sec. Users per second: 1082
Similarity column 38121 (100.0%), 73

[I 2025-01-06 13:15:21,713] Trial 183 finished with value: 0.01849641499360475 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 537, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 738.03 column/sec. Elapsed time 51.65 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.89 sec. Users per second: 1081
Similarity column 38121 (100.0%), 738.10 column/sec. Elapsed time 51.65 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.36 sec. Users per second: 1066
Similarity column 38121 (100.0%), 737.78 column/sec. Elapsed time 51.67 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.83 sec. Users per second: 1084
Similarity column 38121 (100.0%), 737.19 column/sec. Elapsed time 51.71 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.52 sec. Users per second: 1094
Similarity column 38121 (100.0%), 74

[I 2025-01-06 13:22:26,230] Trial 184 finished with value: 0.015524765347209164 and parameters: {'similarity': 'asymmetric', 'topK': 40, 'shrink': 584, 'feature_weighting': 'TF-IDF', 'asymmetric_alpha': 1.1687371953163728}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 977.66 column/sec. Elapsed time 38.99 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 27.20 sec. Users per second: 1308
Similarity column 38121 (100.0%), 983.96 column/sec. Elapsed time 38.74 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 27.43 sec. Users per second: 1297
Similarity column 38121 (100.0%), 981.46 column/sec. Elapsed time 38.84 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 27.90 sec. Users per second: 1275
Similarity column 38121 (100.0%), 980.70 column/sec. Elapsed time 38.87 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 27.27 sec. Users per second: 1304
Similarity column 38121 (100.0%), 97

[I 2025-01-06 13:27:59,768] Trial 185 finished with value: 0.016272298484203633 and parameters: {'similarity': 'cosine', 'topK': 1, 'shrink': 515, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 742.81 column/sec. Elapsed time 51.32 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.69 sec. Users per second: 1122
Similarity column 38121 (100.0%), 740.84 column/sec. Elapsed time 51.46 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.62 sec. Users per second: 1125
Similarity column 38121 (100.0%), 733.08 column/sec. Elapsed time 52.00 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.86 sec. Users per second: 1117
Similarity column 38121 (100.0%), 740.48 column/sec. Elapsed time 51.48 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.51 sec. Users per second: 1129
Similarity column 38121 (100.0%), 73

[I 2025-01-06 13:34:58,932] Trial 186 finished with value: 0.018644810181348614 and parameters: {'similarity': 'cosine', 'topK': 14, 'shrink': 554, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 720.12 column/sec. Elapsed time 52.94 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 51.71 sec. Users per second: 688
Similarity column 38121 (100.0%), 714.43 column/sec. Elapsed time 53.36 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 51.84 sec. Users per second: 686
Similarity column 38121 (100.0%), 704.24 column/sec. Elapsed time 54.13 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 52.36 sec. Users per second: 680
Similarity column 38121 (100.0%), 706.72 column/sec. Elapsed time 53.94 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 52.16 sec. Users per second: 682
Similarity column 38121 (100.0%), 708.50

[I 2025-01-06 13:43:53,469] Trial 187 finished with value: 0.016448116511932138 and parameters: {'similarity': 'cosine', 'topK': 608, 'shrink': 551, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 718.18 column/sec. Elapsed time 53.08 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.25 sec. Users per second: 1103
Similarity column 38121 (100.0%), 719.15 column/sec. Elapsed time 53.01 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.12 sec. Users per second: 1108
Similarity column 38121 (100.0%), 731.71 column/sec. Elapsed time 52.10 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.93 sec. Users per second: 1114
Similarity column 38121 (100.0%), 735.24 column/sec. Elapsed time 51.85 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.91 sec. Users per second: 1115
Similarity column 38121 (100.0%), 73

[I 2025-01-06 13:50:56,743] Trial 188 finished with value: 0.018628302856076712 and parameters: {'similarity': 'cosine', 'topK': 14, 'shrink': 532, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 735.95 column/sec. Elapsed time 51.80 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.66 sec. Users per second: 1026
Similarity column 38121 (100.0%), 736.51 column/sec. Elapsed time 51.76 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.42 sec. Users per second: 1033
Similarity column 38121 (100.0%), 737.49 column/sec. Elapsed time 51.69 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.40 sec. Users per second: 1034
Similarity column 38121 (100.0%), 739.75 column/sec. Elapsed time 51.53 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.72 sec. Users per second: 1025
Similarity column 38121 (100.0%), 73

[I 2025-01-06 13:58:10,392] Trial 189 finished with value: 0.018168088719730847 and parameters: {'similarity': 'cosine', 'topK': 53, 'shrink': 521, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 739.78 column/sec. Elapsed time 51.53 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.03 sec. Users per second: 1077
Similarity column 38121 (100.0%), 737.57 column/sec. Elapsed time 51.68 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.76 sec. Users per second: 1086
Similarity column 38121 (100.0%), 740.41 column/sec. Elapsed time 51.49 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.19 sec. Users per second: 1072
Similarity column 38121 (100.0%), 740.92 column/sec. Elapsed time 51.45 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.83 sec. Users per second: 1084
Similarity column 38121 (100.0%), 74

[I 2025-01-06 14:05:14,395] Trial 190 finished with value: 0.01847639156554346 and parameters: {'similarity': 'cosine', 'topK': 26, 'shrink': 559, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 744.46 column/sec. Elapsed time 51.21 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.32 sec. Users per second: 1135
Similarity column 38121 (100.0%), 741.02 column/sec. Elapsed time 51.44 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.47 sec. Users per second: 1130
Similarity column 38121 (100.0%), 745.18 column/sec. Elapsed time 51.16 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.58 sec. Users per second: 1127
Similarity column 38121 (100.0%), 744.33 column/sec. Elapsed time 51.22 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.20 sec. Users per second: 1140
Similarity column 38121 (100.0%), 74

[I 2025-01-06 14:12:09,424] Trial 191 finished with value: 0.018661888216538573 and parameters: {'similarity': 'cosine', 'topK': 12, 'shrink': 536, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 744.46 column/sec. Elapsed time 51.21 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.38 sec. Users per second: 1133
Similarity column 38121 (100.0%), 738.83 column/sec. Elapsed time 51.60 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.37 sec. Users per second: 1134
Similarity column 38121 (100.0%), 743.61 column/sec. Elapsed time 51.27 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.54 sec. Users per second: 1128
Similarity column 38121 (100.0%), 742.19 column/sec. Elapsed time 51.36 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.45 sec. Users per second: 1131
Similarity column 38121 (100.0%), 74

[I 2025-01-06 14:19:05,036] Trial 192 finished with value: 0.018648169415590592 and parameters: {'similarity': 'cosine', 'topK': 12, 'shrink': 533, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 745.41 column/sec. Elapsed time 51.14 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.55 sec. Users per second: 1093
Similarity column 38121 (100.0%), 741.25 column/sec. Elapsed time 51.43 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.71 sec. Users per second: 1088
Similarity column 38121 (100.0%), 745.83 column/sec. Elapsed time 51.11 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.30 sec. Users per second: 1102
Similarity column 38121 (100.0%), 743.15 column/sec. Elapsed time 51.30 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.51 sec. Users per second: 1094
Similarity column 38121 (100.0%), 74

[I 2025-01-06 14:26:05,292] Trial 193 finished with value: 0.01856491396186851 and parameters: {'similarity': 'cosine', 'topK': 23, 'shrink': 531, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 747.01 column/sec. Elapsed time 51.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.55 sec. Users per second: 1060
Similarity column 38121 (100.0%), 745.58 column/sec. Elapsed time 51.13 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.29 sec. Users per second: 1068
Similarity column 38121 (100.0%), 745.20 column/sec. Elapsed time 51.16 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.50 sec. Users per second: 1062
Similarity column 38121 (100.0%), 734.69 column/sec. Elapsed time 51.89 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.65 sec. Users per second: 1057
Similarity column 38121 (100.0%), 73

[I 2025-01-06 14:33:11,885] Trial 194 finished with value: 0.01829322539791657 and parameters: {'similarity': 'cosine', 'topK': 39, 'shrink': 500, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 1043.78 column/sec. Elapsed time 36.52 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 23.82 sec. Users per second: 1493
Similarity column 38121 (100.0%), 1048.63 column/sec. Elapsed time 36.35 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 24.03 sec. Users per second: 1481
Similarity column 38121 (100.0%), 1042.63 column/sec. Elapsed time 36.56 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 24.15 sec. Users per second: 1474
Similarity column 38121 (100.0%), 1047.47 column/sec. Elapsed time 36.39 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 23.95 sec. Users per second: 1485
Similarity column 38121 (100.0%)

[I 2025-01-06 14:38:17,446] Trial 195 finished with value: 0.0002664826418719655 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 542, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 1.5077809381435272, 'tversky_beta': 0.5322346277112638}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 741.60 column/sec. Elapsed time 51.40 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 32.10 sec. Users per second: 1108
Similarity column 38121 (100.0%), 740.34 column/sec. Elapsed time 51.49 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.76 sec. Users per second: 1120
Similarity column 38121 (100.0%), 742.57 column/sec. Elapsed time 51.34 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.66 sec. Users per second: 1124
Similarity column 38121 (100.0%), 744.64 column/sec. Elapsed time 51.19 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.19 sec. Users per second: 1105
Similarity column 38121 (100.0%), 74

[I 2025-01-06 14:45:15,611] Trial 196 finished with value: 0.018488894001407984 and parameters: {'similarity': 'cosine', 'topK': 18, 'shrink': 454, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 743.80 column/sec. Elapsed time 51.25 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.09 sec. Users per second: 1075
Similarity column 38121 (100.0%), 742.05 column/sec. Elapsed time 51.37 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.28 sec. Users per second: 1069
Similarity column 38121 (100.0%), 726.09 column/sec. Elapsed time 52.50 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.82 sec. Users per second: 1052
Similarity column 38121 (100.0%), 729.13 column/sec. Elapsed time 52.28 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.15 sec. Users per second: 1042
Similarity column 38121 (100.0%), 70

[I 2025-01-06 14:52:33,089] Trial 197 finished with value: 0.012364306387745076 and parameters: {'similarity': 'cosine', 'topK': 42, 'shrink': 508, 'feature_weighting': 'none'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 657.82 column/sec. Elapsed time 57.95 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.21 sec. Users per second: 982
Similarity column 38121 (100.0%), 664.54 column/sec. Elapsed time 57.36 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.50 sec. Users per second: 1062
Similarity column 38121 (100.0%), 665.86 column/sec. Elapsed time 57.25 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 35.23 sec. Users per second: 1010
Similarity column 38121 (100.0%), 695.44 column/sec. Elapsed time 54.82 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.23 sec. Users per second: 1104
Similarity column 38121 (100.0%), 701

[I 2025-01-06 15:00:08,179] Trial 198 finished with value: 0.01859521833978208 and parameters: {'similarity': 'cosine', 'topK': 14, 'shrink': 485, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 694.40 column/sec. Elapsed time 54.90 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 36.70 sec. Users per second: 969
Similarity column 38121 (100.0%), 716.81 column/sec. Elapsed time 53.18 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 35.26 sec. Users per second: 1009
Similarity column 38121 (100.0%), 730.86 column/sec. Elapsed time 52.16 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.98 sec. Users per second: 1017
Similarity column 38121 (100.0%), 730.50 column/sec. Elapsed time 52.18 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.99 sec. Users per second: 1017
Similarity column 38121 (100.0%), 736

[I 2025-01-06 15:07:32,152] Trial 199 finished with value: 0.01811117047119342 and parameters: {'similarity': 'cosine', 'topK': 64, 'shrink': 530, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 743.41 column/sec. Elapsed time 51.28 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.36 sec. Users per second: 1066
Similarity column 38121 (100.0%), 733.25 column/sec. Elapsed time 51.99 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.19 sec. Users per second: 1072
Similarity column 38121 (100.0%), 737.79 column/sec. Elapsed time 51.67 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.54 sec. Users per second: 1061
Similarity column 38121 (100.0%), 719.27 column/sec. Elapsed time 53.00 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.80 sec. Users per second: 1052
Similarity column 38121 (100.0%), 71

[I 2025-01-06 15:14:44,223] Trial 200 finished with value: 0.013185967517649888 and parameters: {'similarity': 'jaccard', 'topK': 29, 'shrink': 547, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 724.09 column/sec. Elapsed time 52.65 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.68 sec. Users per second: 1122
Similarity column 38121 (100.0%), 720.11 column/sec. Elapsed time 52.94 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.31 sec. Users per second: 1136
Similarity column 38121 (100.0%), 734.06 column/sec. Elapsed time 51.93 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.58 sec. Users per second: 1127
Similarity column 38121 (100.0%), 727.50 column/sec. Elapsed time 52.40 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.32 sec. Users per second: 1136
Similarity column 38121 (100.0%), 72

[I 2025-01-06 15:21:46,067] Trial 201 finished with value: 0.01863559762687608 and parameters: {'similarity': 'cosine', 'topK': 9, 'shrink': 579, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 728.66 column/sec. Elapsed time 52.32 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.49 sec. Users per second: 1129
Similarity column 38121 (100.0%), 724.73 column/sec. Elapsed time 52.60 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.22 sec. Users per second: 1104
Similarity column 38121 (100.0%), 718.50 column/sec. Elapsed time 53.06 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.12 sec. Users per second: 1108
Similarity column 38121 (100.0%), 716.49 column/sec. Elapsed time 53.20 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.96 sec. Users per second: 1113
Similarity column 38121 (100.0%), 72

[I 2025-01-06 15:28:50,787] Trial 202 finished with value: 0.018632893935222144 and parameters: {'similarity': 'cosine', 'topK': 11, 'shrink': 575, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 1035.28 column/sec. Elapsed time 36.82 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 24.33 sec. Users per second: 1462
Similarity column 38121 (100.0%), 1030.10 column/sec. Elapsed time 37.01 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 24.85 sec. Users per second: 1432
Similarity column 38121 (100.0%), 1026.39 column/sec. Elapsed time 37.14 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 24.49 sec. Users per second: 1453
Similarity column 38121 (100.0%), 1031.40 column/sec. Elapsed time 36.96 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 24.26 sec. Users per second: 1466
Similarity column 38121 (100.0%)

[I 2025-01-06 15:33:59,926] Trial 203 finished with value: 0.0002664826418719655 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 580, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 726.57 column/sec. Elapsed time 52.47 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 33.23 sec. Users per second: 1070
Similarity column 38121 (100.0%), 728.27 column/sec. Elapsed time 52.34 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 33.33 sec. Users per second: 1067
Similarity column 38121 (100.0%), 730.52 column/sec. Elapsed time 52.18 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 33.28 sec. Users per second: 1069
Similarity column 38121 (100.0%), 730.76 column/sec. Elapsed time 52.17 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 33.07 sec. Users per second: 1076
Similarity column 38121 (100.0%), 73

[I 2025-01-06 15:41:08,931] Trial 204 finished with value: 0.01848370255048018 and parameters: {'similarity': 'cosine', 'topK': 29, 'shrink': 565, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 728.74 column/sec. Elapsed time 52.31 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.87 sec. Users per second: 1116
Similarity column 38121 (100.0%), 733.33 column/sec. Elapsed time 51.98 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 31.70 sec. Users per second: 1122
Similarity column 38121 (100.0%), 732.51 column/sec. Elapsed time 52.04 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 31.75 sec. Users per second: 1121
Similarity column 38121 (100.0%), 729.29 column/sec. Elapsed time 52.27 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 31.84 sec. Users per second: 1117
Similarity column 38121 (100.0%), 73

[I 2025-01-06 15:48:10,384] Trial 205 finished with value: 0.018637114766294857 and parameters: {'similarity': 'cosine', 'topK': 13, 'shrink': 588, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 735.43 column/sec. Elapsed time 51.83 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 34.18 sec. Users per second: 1040
Similarity column 38121 (100.0%), 729.89 column/sec. Elapsed time 52.23 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 34.18 sec. Users per second: 1041
Similarity column 38121 (100.0%), 730.60 column/sec. Elapsed time 52.18 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 34.29 sec. Users per second: 1038
Similarity column 38121 (100.0%), 732.04 column/sec. Elapsed time 52.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 34.13 sec. Users per second: 1042
Similarity column 38121 (100.0%), 73

[I 2025-01-06 15:55:23,768] Trial 206 finished with value: 0.01828258619847848 and parameters: {'similarity': 'cosine', 'topK': 44, 'shrink': 599, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 735.44 column/sec. Elapsed time 51.83 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 31.68 sec. Users per second: 1123
Similarity column 38121 (100.0%), 723.08 column/sec. Elapsed time 52.72 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 32.44 sec. Users per second: 1097
Similarity column 38121 (100.0%), 708.76 column/sec. Elapsed time 53.79 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 32.76 sec. Users per second: 1086
Similarity column 38121 (100.0%), 707.71 column/sec. Elapsed time 53.87 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 32.65 sec. Users per second: 1089
Similarity column 38121 (100.0%), 70

[I 2025-01-06 16:02:34,085] Trial 207 finished with value: 0.018420443234432548 and parameters: {'similarity': 'cosine', 'topK': 13, 'shrink': 603, 'feature_weighting': 'BM25'}. Best is trial 121 with value: 0.018671630477460725.


Similarity column 38121 (100.0%), 1011.97 column/sec. Elapsed time 37.67 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 24.66 sec. Users per second: 1442
Similarity column 38121 (100.0%), 1019.94 column/sec. Elapsed time 37.38 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 24.67 sec. Users per second: 1442
Similarity column 38121 (100.0%), 1023.02 column/sec. Elapsed time 37.26 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 24.35 sec. Users per second: 1461
Similarity column 38121 (100.0%), 1034.76 column/sec. Elapsed time 36.84 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 24.27 sec. Users per second: 1466
Similarity column 38121 (100.0%)

[I 2025-01-06 16:07:44,421] Trial 208 finished with value: 0.0002664826418719655 and parameters: {'similarity': 'cosine', 'topK': 0, 'shrink': 582, 'feature_weighting': 'TF-IDF'}. Best is trial 121 with value: 0.018671630477460725.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = ItemKNNCBFRecommender(URM_train + URM_validation,ICM_all)
    recommender_instance.fit(**best_params)

Similarity column 38121 (100.0%), 725.44 column/sec. Elapsed time 52.55 sec


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/working/RECsys_Challenge2024/Dataset/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_ItemKNNCBF_MAP.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCBFRecommender/OptimizingMAP/best_params_ItemKNNCBF_MAP.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCBFRecommender/OptimizingMAP/history_ItemKNNCBF_MAP.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/KNN/ItemKNNCBFRecommender/OptimizingMAP/Submission/submission_ItemKNNCBF_MAP.csv' updated successfully.
