# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 6727, done.[K
remote: Counting objects: 100% (277/277), done.[K
remote: Compressing objects: 100% (239/239), done.[K
remote: Total 6727 (delta 105), reused 30 (delta 6), pack-reused 6450 (from 2)[K
Receiving objects: 100% (6727/6727), 408.86 MiB | 34.18 MiB/s, done.
Resolving deltas: 100% (3680/3680), done.
Updating files: 100% (503/503), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
%cd /kaggle/working/RECsys_Challenge2024 
! python run_compile_all_cython.py

/kaggle/working/RECsys_Challenge2024
run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/usr/bin/python3'
Compiling [1/11]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/usr/local/lib/python3.10/dist-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorizationImpressions_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorizationImpressions_Cython_Epoch.c:[m[K In function ‘[01m[K__pyx_f_43MatrixFactorizationImpressions_Cython_Epoch_32MatrixFactorization_Cython_Epoch_sampleBPR_Cython[m[K’:
30353 |       [01;35m[K__pyx_t_4

In [5]:
from Utils.notebookFunctions import *
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil
import optuna
import json
import os
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit


K_PATH = '/kaggle/working/RECsys_Challenge2024'
GH_PATH = 'TrainedModels/WithKFCV/KNN'

np.random.seed(42)

## Import the repository **RECsys_Challenge2024**

In [6]:
repo = get_repo_from_github(token)

Repository 'RECsys_Challenge2024' found.


In [7]:
config = {
    'model': 'UserKNNCF',
    'n_folds': 5,
    'metric': 'MAP',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_UserKNNCF_MAP.db',
    'copy_prev_best_params': False,
    'tune_best_params': True,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [8]:
try:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [9]:
URM_all_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [10]:
ICM_dataframe = pd.read_csv("/kaggle/working/RECsys_Challenge2024/Dataset/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<38121x94331 sparse matrix of type '<class 'numpy.float64'>'
	with 2940040 stored elements in Compressed Sparse Row format>

# Training

In [11]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_k_folds import split_train_k_folds

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

folds = split_train_k_folds(URM_all, k=config['n_folds'])



In [12]:
from Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender

def objective_function_UserKNNCF(optuna_trial):
    
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 750),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    validation_results = []
    
    for fold_idx, (URM_train_fold, URM_validation_fold) in enumerate(folds):
    
        recommender_instance = UserKNNCFRecommender(URM_train_fold)
        recommender_instance.fit(**full_hyperp)
        
        evaluator = EvaluatorHoldout(URM_validation_fold, cutoff_list=[10])
        result_df, _ = evaluator.evaluateRecommender(recommender_instance)
        
        validation_results.append(result_df.loc[10]["MAP"])
    
    return np.mean(validation_results)

In [13]:
if config['tune_parameters']:
    
    optuna_study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    optuna_study.optimize(objective_function_UserKNNCF, n_trials=30)

[I 2025-01-06 09:56:28,089] Using an existing study with name 'hyperparameters_tuning_UserKNNCF_MAP' instead of creating a new one.


Similarity column 35736 (100.0%), 2992.63 column/sec. Elapsed time 11.94 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.56 sec. Users per second: 618
Similarity column 35736 (100.0%), 3000.55 column/sec. Elapsed time 11.91 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.29 sec. Users per second: 621
Similarity column 35736 (100.0%), 3010.57 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.30 sec. Users per second: 621
Similarity column 35736 (100.0%), 3000.56 column/sec. Elapsed time 11.91 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.93 sec. Users per second: 614
Similarity column 35736 (100.0%), 29

[I 2025-01-06 10:02:22,610] Trial 221 finished with value: 0.04229930985733535 and parameters: {'similarity': 'asymmetric', 'topK': 736, 'shrink': 587, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.8777677501072773}. Best is trial 174 with value: 0.04235975952863978.


Similarity column 35736 (100.0%), 2970.32 column/sec. Elapsed time 12.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.99 sec. Users per second: 603
Similarity column 35736 (100.0%), 2961.64 column/sec. Elapsed time 12.07 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.73 sec. Users per second: 616
Similarity column 35736 (100.0%), 3048.78 column/sec. Elapsed time 11.72 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.66 sec. Users per second: 617
Similarity column 35736 (100.0%), 3027.69 column/sec. Elapsed time 11.80 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.50 sec. Users per second: 619
Similarity column 35736 (100.0%), 30

[I 2025-01-06 10:08:17,601] Trial 222 finished with value: 0.042389745203136445 and parameters: {'similarity': 'asymmetric', 'topK': 750, 'shrink': 631, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7898403492409651}. Best is trial 222 with value: 0.042389745203136445.


Similarity column 35736 (100.0%), 3033.59 column/sec. Elapsed time 11.78 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 56.76 sec. Users per second: 627
Similarity column 35736 (100.0%), 3034.89 column/sec. Elapsed time 11.78 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 56.90 sec. Users per second: 625
Similarity column 35736 (100.0%), 3053.95 column/sec. Elapsed time 11.70 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.08 sec. Users per second: 623
Similarity column 35736 (100.0%), 3017.80 column/sec. Elapsed time 11.84 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.78 sec. Users per second: 626
Similarity column 35736 (100.0%), 30

[I 2025-01-06 10:14:07,093] Trial 223 finished with value: 0.04230473700056118 and parameters: {'similarity': 'asymmetric', 'topK': 736, 'shrink': 633, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.8579798393623105}. Best is trial 222 with value: 0.042389745203136445.


Similarity column 35736 (100.0%), 3080.54 column/sec. Elapsed time 11.60 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 47.12 sec. Users per second: 755
Similarity column 35736 (100.0%), 3062.74 column/sec. Elapsed time 11.67 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 46.92 sec. Users per second: 758
Similarity column 35736 (100.0%), 3058.33 column/sec. Elapsed time 11.68 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 47.19 sec. Users per second: 754
Similarity column 35736 (100.0%), 3085.42 column/sec. Elapsed time 11.58 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 50.60 sec. Users per second: 703
Similarity column 35736 (100.0%), 29

[I 2025-01-06 10:19:11,980] Trial 224 finished with value: 0.03975976980412295 and parameters: {'similarity': 'asymmetric', 'topK': 331, 'shrink': 629, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7548818669187382}. Best is trial 222 with value: 0.042389745203136445.


Similarity column 35736 (100.0%), 2994.81 column/sec. Elapsed time 11.93 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.38 sec. Users per second: 609
Similarity column 35736 (100.0%), 3019.60 column/sec. Elapsed time 11.83 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.40 sec. Users per second: 620
Similarity column 35736 (100.0%), 3009.55 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.65 sec. Users per second: 617
Similarity column 35736 (100.0%), 3023.44 column/sec. Elapsed time 11.82 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.48 sec. Users per second: 619
Similarity column 35736 (100.0%), 30

[I 2025-01-06 10:25:06,222] Trial 225 finished with value: 0.042421652977767725 and parameters: {'similarity': 'asymmetric', 'topK': 750, 'shrink': 551, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7938258356843384}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2983.29 column/sec. Elapsed time 11.98 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 56.98 sec. Users per second: 624
Similarity column 35736 (100.0%), 3028.16 column/sec. Elapsed time 11.80 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 56.93 sec. Users per second: 625
Similarity column 35736 (100.0%), 3016.80 column/sec. Elapsed time 11.85 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 56.97 sec. Users per second: 625
Similarity column 35736 (100.0%), 3026.21 column/sec. Elapsed time 11.81 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.63 sec. Users per second: 628
Similarity column 35736 (100.0%), 30

[I 2025-01-06 10:30:56,257] Trial 226 finished with value: 0.04232078708541735 and parameters: {'similarity': 'asymmetric', 'topK': 719, 'shrink': 545, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7982273576548075}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3013.15 column/sec. Elapsed time 11.86 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.46 sec. Users per second: 619
Similarity column 35736 (100.0%), 2991.05 column/sec. Elapsed time 11.95 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.65 sec. Users per second: 617
Similarity column 35736 (100.0%), 2995.02 column/sec. Elapsed time 11.93 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.10 sec. Users per second: 623
Similarity column 35736 (100.0%), 3021.70 column/sec. Elapsed time 11.83 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.92 sec. Users per second: 625
Similarity column 35736 (100.0%), 30

[I 2025-01-06 10:36:48,634] Trial 227 finished with value: 0.042259425584893276 and parameters: {'similarity': 'asymmetric', 'topK': 707, 'shrink': 547, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7242674164038805}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2951.86 column/sec. Elapsed time 12.11 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 56.30 sec. Users per second: 632
Similarity column 35736 (100.0%), 2956.92 column/sec. Elapsed time 12.09 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 56.48 sec. Users per second: 630
Similarity column 35736 (100.0%), 2943.17 column/sec. Elapsed time 12.14 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 56.61 sec. Users per second: 629
Similarity column 35736 (100.0%), 2895.41 column/sec. Elapsed time 12.34 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.39 sec. Users per second: 631
Similarity column 35736 (100.0%), 29

[I 2025-01-06 10:42:37,256] Trial 228 finished with value: 0.03323225079092358 and parameters: {'similarity': 'asymmetric', 'topK': 722, 'shrink': 578, 'feature_weighting': 'none', 'asymmetric_alpha': 0.796287706887566}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3039.68 column/sec. Elapsed time 11.76 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.54 sec. Users per second: 618
Similarity column 35736 (100.0%), 2975.75 column/sec. Elapsed time 12.01 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.90 sec. Users per second: 614
Similarity column 35736 (100.0%), 2961.88 column/sec. Elapsed time 12.07 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.04 sec. Users per second: 613
Similarity column 35736 (100.0%), 2955.05 column/sec. Elapsed time 12.09 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.47 sec. Users per second: 619
Similarity column 35736 (100.0%), 29

[I 2025-01-06 10:48:32,240] Trial 229 finished with value: 0.0423196148735781 and parameters: {'similarity': 'asymmetric', 'topK': 734, 'shrink': 560, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.8549107555668908}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3095.79 column/sec. Elapsed time 11.54 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 56.71 sec. Users per second: 627
Similarity column 35736 (100.0%), 3087.77 column/sec. Elapsed time 11.57 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 56.55 sec. Users per second: 629
Similarity column 35736 (100.0%), 3052.32 column/sec. Elapsed time 11.71 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 56.16 sec. Users per second: 634
Similarity column 35736 (100.0%), 3073.04 column/sec. Elapsed time 11.63 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 56.12 sec. Users per second: 634
Similarity column 35736 (100.0%), 31

[I 2025-01-06 10:54:19,312] Trial 230 finished with value: 0.03875046757443752 and parameters: {'similarity': 'jaccard', 'topK': 709, 'shrink': 588, 'feature_weighting': 'BM25'}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3015.43 column/sec. Elapsed time 11.85 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.05 sec. Users per second: 613
Similarity column 35736 (100.0%), 2963.46 column/sec. Elapsed time 12.06 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.11 sec. Users per second: 612
Similarity column 35736 (100.0%), 2991.22 column/sec. Elapsed time 11.95 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.28 sec. Users per second: 611
Similarity column 35736 (100.0%), 2950.66 column/sec. Elapsed time 12.11 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.79 sec. Users per second: 615
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:00:17,039] Trial 231 finished with value: 0.0423278130722116 and parameters: {'similarity': 'asymmetric', 'topK': 735, 'shrink': 550, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.8599322654744996}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2935.18 column/sec. Elapsed time 12.18 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.00 sec. Users per second: 613
Similarity column 35736 (100.0%), 2962.75 column/sec. Elapsed time 12.06 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.84 sec. Users per second: 615
Similarity column 35736 (100.0%), 2915.45 column/sec. Elapsed time 12.26 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.17 sec. Users per second: 612
Similarity column 35736 (100.0%), 2959.95 column/sec. Elapsed time 12.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.47 sec. Users per second: 619
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:06:14,644] Trial 232 finished with value: 0.04230863663055442 and parameters: {'similarity': 'asymmetric', 'topK': 728, 'shrink': 569, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.8431837999412853}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2918.20 column/sec. Elapsed time 12.25 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.71 sec. Users per second: 616
Similarity column 35736 (100.0%), 2982.06 column/sec. Elapsed time 11.98 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.70 sec. Users per second: 617
Similarity column 35736 (100.0%), 3010.37 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.78 sec. Users per second: 616
Similarity column 35736 (100.0%), 3000.59 column/sec. Elapsed time 11.91 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.85 sec. Users per second: 615
Similarity column 35736 (100.0%), 30

[I 2025-01-06 11:12:09,642] Trial 233 finished with value: 0.04232063224293754 and parameters: {'similarity': 'asymmetric', 'topK': 722, 'shrink': 561, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7543630691426709}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2918.18 column/sec. Elapsed time 12.25 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.04 sec. Users per second: 624
Similarity column 35736 (100.0%), 3014.72 column/sec. Elapsed time 11.85 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 56.91 sec. Users per second: 625
Similarity column 35736 (100.0%), 3020.64 column/sec. Elapsed time 11.83 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 56.97 sec. Users per second: 625
Similarity column 35736 (100.0%), 2959.64 column/sec. Elapsed time 12.07 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.17 sec. Users per second: 622
Similarity column 35736 (100.0%), 30

[I 2025-01-06 11:18:00,689] Trial 234 finished with value: 0.04218676286277703 and parameters: {'similarity': 'asymmetric', 'topK': 688, 'shrink': 554, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7588458326392903}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2884.67 column/sec. Elapsed time 12.39 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.02 sec. Users per second: 613
Similarity column 35736 (100.0%), 3011.05 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.68 sec. Users per second: 606
Similarity column 35736 (100.0%), 2892.45 column/sec. Elapsed time 12.35 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.34 sec. Users per second: 610
Similarity column 35736 (100.0%), 2984.35 column/sec. Elapsed time 11.97 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.01 sec. Users per second: 613
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:23:59,054] Trial 235 finished with value: 0.04233046724539148 and parameters: {'similarity': 'asymmetric', 'topK': 725, 'shrink': 518, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6828616893033881}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2911.95 column/sec. Elapsed time 12.27 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.98 sec. Users per second: 613
Similarity column 35736 (100.0%), 2998.87 column/sec. Elapsed time 11.92 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.10 sec. Users per second: 612
Similarity column 35736 (100.0%), 3005.45 column/sec. Elapsed time 11.89 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.11 sec. Users per second: 612
Similarity column 35736 (100.0%), 2967.73 column/sec. Elapsed time 12.04 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.90 sec. Users per second: 614
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:29:55,865] Trial 236 finished with value: 0.04230527285693151 and parameters: {'similarity': 'asymmetric', 'topK': 719, 'shrink': 513, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6736820875508296}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2911.68 column/sec. Elapsed time 12.27 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.48 sec. Users per second: 608
Similarity column 35736 (100.0%), 2981.20 column/sec. Elapsed time 11.99 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.13 sec. Users per second: 612
Similarity column 35736 (100.0%), 2989.85 column/sec. Elapsed time 11.95 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.23 sec. Users per second: 611
Similarity column 35736 (100.0%), 2914.83 column/sec. Elapsed time 12.26 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.05 sec. Users per second: 613
Similarity column 35736 (100.0%), 30

[I 2025-01-06 11:35:53,428] Trial 237 finished with value: 0.04223009203164237 and parameters: {'similarity': 'asymmetric', 'topK': 720, 'shrink': 484, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6055814633279637}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2952.43 column/sec. Elapsed time 12.10 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.31 sec. Users per second: 621
Similarity column 35736 (100.0%), 3019.27 column/sec. Elapsed time 11.84 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.67 sec. Users per second: 617
Similarity column 35736 (100.0%), 2951.42 column/sec. Elapsed time 12.11 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.78 sec. Users per second: 616
Similarity column 35736 (100.0%), 2945.28 column/sec. Elapsed time 12.13 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.26 sec. Users per second: 611
Similarity column 35736 (100.0%), 30

[I 2025-01-06 11:41:49,318] Trial 238 finished with value: 0.04222507431468346 and parameters: {'similarity': 'asymmetric', 'topK': 697, 'shrink': 525, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6805693060316125}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2951.35 column/sec. Elapsed time 12.11 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.72 sec. Users per second: 616
Similarity column 35736 (100.0%), 2979.71 column/sec. Elapsed time 11.99 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.42 sec. Users per second: 619
Similarity column 35736 (100.0%), 2978.31 column/sec. Elapsed time 12.00 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.40 sec. Users per second: 620
Similarity column 35736 (100.0%), 2982.80 column/sec. Elapsed time 11.98 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.45 sec. Users per second: 609
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:47:44,284] Trial 239 finished with value: 0.04227710071181274 and parameters: {'similarity': 'asymmetric', 'topK': 705, 'shrink': 510, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7017512639511052}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2908.66 column/sec. Elapsed time 12.29 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 59.11 sec. Users per second: 602
Similarity column 35736 (100.0%), 2928.03 column/sec. Elapsed time 12.20 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.18 sec. Users per second: 611
Similarity column 35736 (100.0%), 3009.09 column/sec. Elapsed time 11.88 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.13 sec. Users per second: 612
Similarity column 35736 (100.0%), 2943.68 column/sec. Elapsed time 12.14 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.07 sec. Users per second: 613
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:53:43,798] Trial 240 finished with value: 0.042213018910009135 and parameters: {'similarity': 'asymmetric', 'topK': 721, 'shrink': 639, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6656444144657055}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2917.78 column/sec. Elapsed time 12.25 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.08 sec. Users per second: 612
Similarity column 35736 (100.0%), 3011.46 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.84 sec. Users per second: 615
Similarity column 35736 (100.0%), 3005.99 column/sec. Elapsed time 11.89 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.53 sec. Users per second: 608
Similarity column 35736 (100.0%), 2906.33 column/sec. Elapsed time 12.30 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.85 sec. Users per second: 604
Similarity column 35736 (100.0%), 29

[I 2025-01-06 11:59:42,680] Trial 241 finished with value: 0.04236973195595058 and parameters: {'similarity': 'asymmetric', 'topK': 732, 'shrink': 557, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7509697482418383}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2954.91 column/sec. Elapsed time 12.09 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.18 sec. Users per second: 611
Similarity column 35736 (100.0%), 3010.41 column/sec. Elapsed time 11.87 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.81 sec. Users per second: 615
Similarity column 35736 (100.0%), 3009.32 column/sec. Elapsed time 11.88 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.12 sec. Users per second: 612
Similarity column 35736 (100.0%), 2922.41 column/sec. Elapsed time 12.23 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.47 sec. Users per second: 608
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:05:40,191] Trial 242 finished with value: 0.04233941850943866 and parameters: {'similarity': 'asymmetric', 'topK': 724, 'shrink': 559, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7321318204603244}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2944.17 column/sec. Elapsed time 12.14 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.83 sec. Users per second: 615
Similarity column 35736 (100.0%), 3007.88 column/sec. Elapsed time 11.88 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.59 sec. Users per second: 618
Similarity column 35736 (100.0%), 2984.18 column/sec. Elapsed time 11.98 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.81 sec. Users per second: 616
Similarity column 35736 (100.0%), 2943.67 column/sec. Elapsed time 12.14 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.27 sec. Users per second: 610
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:11:36,781] Trial 243 finished with value: 0.04232850563695923 and parameters: {'similarity': 'asymmetric', 'topK': 723, 'shrink': 560, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.734289800125247}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2885.55 column/sec. Elapsed time 12.38 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.06 sec. Users per second: 613
Similarity column 35736 (100.0%), 2983.96 column/sec. Elapsed time 11.98 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.75 sec. Users per second: 616
Similarity column 35736 (100.0%), 2973.92 column/sec. Elapsed time 12.02 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.09 sec. Users per second: 613
Similarity column 35736 (100.0%), 2932.10 column/sec. Elapsed time 12.19 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 58.50 sec. Users per second: 608
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:17:34,575] Trial 244 finished with value: 0.04230255699793496 and parameters: {'similarity': 'asymmetric', 'topK': 715, 'shrink': 564, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7432185979093834}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2899.84 column/sec. Elapsed time 12.32 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.50 sec. Users per second: 618
Similarity column 35736 (100.0%), 2990.85 column/sec. Elapsed time 11.95 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.67 sec. Users per second: 617
Similarity column 35736 (100.0%), 2966.28 column/sec. Elapsed time 12.05 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.36 sec. Users per second: 610
Similarity column 35736 (100.0%), 2995.38 column/sec. Elapsed time 11.93 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.66 sec. Users per second: 617
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:23:30,053] Trial 245 finished with value: 0.04218004754361666 and parameters: {'similarity': 'asymmetric', 'topK': 689, 'shrink': 561, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7202439370129399}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2970.72 column/sec. Elapsed time 12.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.99 sec. Users per second: 603
Similarity column 35736 (100.0%), 2993.83 column/sec. Elapsed time 11.94 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.10 sec. Users per second: 612
Similarity column 35736 (100.0%), 2968.05 column/sec. Elapsed time 12.04 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.71 sec. Users per second: 606
Similarity column 35736 (100.0%), 2980.81 column/sec. Elapsed time 11.99 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.78 sec. Users per second: 616
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:29:28,167] Trial 246 finished with value: 0.042331939754105964 and parameters: {'similarity': 'asymmetric', 'topK': 720, 'shrink': 550, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.776212720734784}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2971.47 column/sec. Elapsed time 12.03 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 51.91 sec. Users per second: 685
Similarity column 35736 (100.0%), 3059.37 column/sec. Elapsed time 11.68 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 50.85 sec. Users per second: 700
Similarity column 35736 (100.0%), 3065.45 column/sec. Elapsed time 11.66 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 51.37 sec. Users per second: 693
Similarity column 35736 (100.0%), 3045.40 column/sec. Elapsed time 11.73 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 50.82 sec. Users per second: 700
Similarity column 35736 (100.0%), 30

[I 2025-01-06 12:34:48,019] Trial 247 finished with value: 0.040933429821890445 and parameters: {'similarity': 'asymmetric', 'topK': 450, 'shrink': 546, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.778915926278175}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3023.69 column/sec. Elapsed time 11.82 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 58.13 sec. Users per second: 612
Similarity column 35736 (100.0%), 2991.96 column/sec. Elapsed time 11.94 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 58.08 sec. Users per second: 613
Similarity column 35736 (100.0%), 3014.16 column/sec. Elapsed time 11.86 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.05 sec. Users per second: 613
Similarity column 35736 (100.0%), 2964.04 column/sec. Elapsed time 12.06 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.76 sec. Users per second: 616
Similarity column 35736 (100.0%), 29

[I 2025-01-06 12:40:44,455] Trial 248 finished with value: 0.04232875526895782 and parameters: {'similarity': 'asymmetric', 'topK': 725, 'shrink': 569, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.795997909976302}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 2999.92 column/sec. Elapsed time 11.91 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.26 sec. Users per second: 621
Similarity column 35736 (100.0%), 2989.12 column/sec. Elapsed time 11.96 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.71 sec. Users per second: 616
Similarity column 35736 (100.0%), 2953.78 column/sec. Elapsed time 12.10 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 57.98 sec. Users per second: 614
Similarity column 35736 (100.0%), 2995.52 column/sec. Elapsed time 11.93 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.28 sec. Users per second: 621
Similarity column 35736 (100.0%), 30

[I 2025-01-06 12:46:38,482] Trial 249 finished with value: 0.042261331227828484 and parameters: {'similarity': 'asymmetric', 'topK': 707, 'shrink': 567, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7374568961521599}. Best is trial 225 with value: 0.042421652977767725.


Similarity column 35736 (100.0%), 3029.74 column/sec. Elapsed time 11.80 sec
EvaluatorHoldout: Ignoring 173 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35563 (100.0%) in 57.46 sec. Users per second: 619
Similarity column 35736 (100.0%), 3018.58 column/sec. Elapsed time 11.84 sec
EvaluatorHoldout: Ignoring 163 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35573 (100.0%) in 57.54 sec. Users per second: 618
Similarity column 35736 (100.0%), 2974.21 column/sec. Elapsed time 12.02 sec
EvaluatorHoldout: Ignoring 150 ( 0.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35586 (100.0%) in 58.21 sec. Users per second: 611
Similarity column 35736 (100.0%), 2993.31 column/sec. Elapsed time 11.94 sec
EvaluatorHoldout: Ignoring 167 ( 0.5%) Users that have less than 1 test interactions
EvaluatorHoldout: Processed 35569 (100.0%) in 57.73 sec. Users per second: 616
Similarity column 35736 (100.0%), 30

[I 2025-01-06 12:52:33,962] Trial 250 finished with value: 0.04234224529047925 and parameters: {'similarity': 'asymmetric', 'topK': 725, 'shrink': 550, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.7691204702757557}. Best is trial 225 with value: 0.042421652977767725.


## Some optuna visualizations on recommender parameters

In [14]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_slice(optuna_study)
fig.show()

In [15]:
if not config['tune_parameters']:
    optuna_study = optuna.load_study(study_name=f'hyperparameters_tuning_{config["model"]}_{config["metric"]}', storage=f'sqlite:///{config["database_path"]}')
    
fig = optuna.visualization.plot_param_importances(optuna_study)
fig.show()

## Let's train the recommender with best parameter values

In [16]:
if config['tune_best_params']:

    if config['tune_parameters']:
        best_params = optuna_study.best_trial.params
    else: 
        with open(f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 'r') as best_params_json:
            best_params = json.load(best_params_json)

    recommender_instance = UserKNNCFRecommender(URM_train + URM_validation)
    recommender_instance.fit(**best_params)

Similarity column 35736 (100.0%), 2641.64 column/sec. Elapsed time 13.53 sec


# Testing

Create the recommendations for the submission. 

In [17]:
if config['tune_best_params']:

    data_target_users_test = pd.read_csv('/kaggle/working/RECsys_Challenge2024/Dataset/data_target_users_test.csv')
    create_submission(data_target_users_test, recommender_instance, f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv')

Submission file saved as /kaggle/working/submission_UserKNNCF_MAP.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [18]:
if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 'w') as params_file:
        json.dump(optuna_study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/best_params_{config["model"]}_{config["metric"]}.json', 
            f'{config["model"]}_{config["metric"]} tuning results (from kaggle notebook)',
            repo
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'{K_PATH}/{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/'\
        f'best_params_{config["model"]}_{config["metric"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}_{config["metric"]}.json'
    )

File 'TrainedModels/WithKFCV/KNN/UserKNNCFRecommender/OptimizingMAP/best_params_UserKNNCF_MAP.json' updated successfully.


Save the history of the tuned model.

In [19]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/history_{config["model"]}_{config["metric"]}.db',
        f'Tuning {config["model"]}_{config["metric"]} db updated results (from kaggle notebook)',
        repo
    )

File 'TrainedModels/WithKFCV/KNN/UserKNNCFRecommender/OptimizingMAP/history_UserKNNCF_MAP.db' updated successfully.


Save the best trained model and its submission.

In [20]:
if config['save_github'] and config['tune_best_params']: 
    upload_file(
                f'/kaggle/working/submission_{config["model"]}_{config["metric"]}.csv', 
                f'{GH_PATH}/{config["model"]}Recommender/Optimizing{config["metric"]}/Submission/submission_{config["model"]}_{config["metric"]}.csv', 
                f'New {config["model"]}_{config["metric"]} submission (from kaggle notebook)',
                repo
            )

File 'TrainedModels/WithKFCV/KNN/UserKNNCFRecommender/OptimizingMAP/Submission/submission_UserKNNCF_MAP.csv' updated successfully.
