# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 1721, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (82/82), done.[K
remote: Total 1721 (delta 56), reused 35 (delta 16), pack-reused 1618 (from 1)[K
Receiving objects: 100% (1721/1721), 68.81 MiB | 24.64 MiB/s, done.
Resolving deltas: 100% (1001/1001), done.


In [3]:
! pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
! cd /kaggle/working/RECsys_Challenge2024 && python run_compile_all_cython.py

run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |         [01;35m[Kfor[m[K (__pyx_t_21 = __pyx_v_start_pos_seen_items; __pyx_t_21 < __pyx

In [5]:
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil

np.random.seed(42)

## Import the repository

In [6]:
from github import Github, Auth

# Authenticate using a personal access token
auth_token = Auth.Token(token)
github_client = Github(auth=auth_token)

# Define the repository name you want to find
target_repo_name = 'RECsys_Challenge2024'
repo = None

# Search for the repository in the user's repositories
try:
    for repository in github_client.get_user().get_repos():
        if repository.name == target_repo_name:
            repo = repository
            print(f"Repository '{target_repo_name}' found.")
            break
    if repo is None:
        print(f"Repository '{target_repo_name}' not found.")
except Exception as e:
    print("An error occurred while accessing the repositories:", e)

Repository 'RECsys_Challenge2024' found.


In [7]:
def upload_file(filepath_kaggle, filepath_github, commit_message):
    """
    Uploads a file from Kaggle to GitHub, updating it if it already exists in the repository,
    or creating it if it does not.

    Parameters:
    - filepath_kaggle: Path to the file in the Kaggle environment.
    - filepath_github: Target path in the GitHub repository where the file should be uploaded.
    - commit_message: Message for the commit on GitHub.
    """
    try:
        
        # Check if the file already exists in the GitHub repository
        contents = repo.get_contents(filepath_github)
        
        # If it exists, update the file
        with open(filepath_kaggle, "rb") as file:
            repo.update_file(
                contents.path, commit_message, file.read(), contents.sha
            )
        print(f"File '{filepath_github}' updated successfully.")
    
    except Exception as e:
        
        # If the file does not exist, create it
        with open(filepath_kaggle, "rb") as file:
            repo.create_file(
                filepath_github, commit_message, file.read()
            )
        print(f"File '{filepath_github}' created successfully.")

In [8]:
config = {
    'model': 'ItemKNNCF',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_ItemKNNCF.db',
    'copy_prev_best_params': False,
    'save_github': True
}

Import the database where previous tuning trials have been saved.

In [9]:
try:
    shutil.copyfile(
        f'/kaggle/working/RECsys_Challenge2024/TrainedModels/' \
        f'{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [10]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [11]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [14]:
%cd /kaggle/working/RECsys_Challenge2024/

/kaggle/working/RECsys_Challenge2024


In [15]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [18]:
import optuna
import pandas as pd
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender

def objective_function_ItemKNNCF(optuna_trial):
    
    recommender_instance = ItemKNNCFRecommender(URM_train)
    similarity = optuna_trial.suggest_categorical("similarity", ['cosine', 'dice', 'jaccard', 'asymmetric', 'tversky', 'euclidean'])
    
    full_hyperp = {"similarity": similarity,
                   "topK": optuna_trial.suggest_int("topK", 0, 1500),
                   "shrink": optuna_trial.suggest_int("shrink", 0, 1000),
                   'feature_weighting': optuna_trial.suggest_categorical('feature_weighting', ["BM25", "TF-IDF", "none"])
                  }
    
    if similarity == "asymmetric":
        full_hyperp["asymmetric_alpha"] = optuna_trial.suggest_float("asymmetric_alpha", 0, 2, log=False)
        full_hyperp["normalize"] = True     

    elif similarity == "tversky":
        full_hyperp["tversky_alpha"] = optuna_trial.suggest_float("tversky_alpha", 0, 2, log=False)
        full_hyperp["tversky_beta"] = optuna_trial.suggest_float("tversky_beta", 0, 2, log=False)
        full_hyperp["normalize"] = True 

    elif similarity == "euclidean":
        full_hyperp["normalize_avg_row"] = optuna_trial.suggest_categorical("normalize_avg_row", [True, False])
        full_hyperp["similarity_from_distance_mode"] = optuna_trial.suggest_categorical("similarity_from_distance_mode", ["lin", "log", "exp"])
        full_hyperp["normalize"] = optuna_trial.suggest_categorical("normalize", [True, False])
        
    
    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [19]:
if config['tune_parameters']:
    
    study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    study.optimize(objective_function_ItemKNNCF, n_trials=100)

[I 2024-11-15 12:58:21,461] Using an existing study with name 'hyperparameters_tuning_ItemKNNCF' instead of creating a new one.


Similarity column 38121 (100.0%), 279.81 column/sec. Elapsed time 2.27 min
EvaluatorHoldout: Processed 35595 (100.0%) in 1.07 min. Users per second: 553


[I 2024-11-15 13:01:43,775] Trial 1 finished with value: 0.013831576353911232 and parameters: {'similarity': 'euclidean', 'topK': 1083, 'shrink': 87, 'feature_weighting': 'BM25', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'log', 'normalize': True}. Best is trial 1 with value: 0.013831576353911232.


Similarity column 38121 (100.0%), 2624.27 column/sec. Elapsed time 14.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 58.19 sec. Users per second: 612


[I 2024-11-15 13:02:57,912] Trial 2 finished with value: 0.03496261179004422 and parameters: {'similarity': 'dice', 'topK': 653, 'shrink': 427, 'feature_weighting': 'TF-IDF'}. Best is trial 2 with value: 0.03496261179004422.


Similarity column 38121 (100.0%), 2450.22 column/sec. Elapsed time 15.56 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.02 min. Users per second: 579


[I 2024-11-15 13:04:16,547] Trial 3 finished with value: 0.037970905045071515 and parameters: {'similarity': 'asymmetric', 'topK': 940, 'shrink': 246, 'feature_weighting': 'BM25', 'asymmetric_alpha': 1.095938761735912}. Best is trial 3 with value: 0.037970905045071515.


Similarity column 38121 (100.0%), 386.21 column/sec. Elapsed time 1.65 min
EvaluatorHoldout: Processed 35595 (100.0%) in 33.85 sec. Users per second: 1051


[I 2024-11-15 13:06:29,757] Trial 4 finished with value: 0.01916516163987733 and parameters: {'similarity': 'euclidean', 'topK': 379, 'shrink': 725, 'feature_weighting': 'none', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'exp', 'normalize': False}. Best is trial 3 with value: 0.037970905045071515.


Similarity column 38121 (100.0%), 377.85 column/sec. Elapsed time 1.68 min
EvaluatorHoldout: Processed 35595 (100.0%) in 1.02 min. Users per second: 582


[I 2024-11-15 13:09:12,819] Trial 5 finished with value: 0.029883539912195015 and parameters: {'similarity': 'euclidean', 'topK': 590, 'shrink': 131, 'feature_weighting': 'BM25', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'lin', 'normalize': False}. Best is trial 3 with value: 0.037970905045071515.


Similarity column 38121 (100.0%), 2472.82 column/sec. Elapsed time 15.42 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.15 min. Users per second: 515


[I 2024-11-15 13:10:38,357] Trial 6 finished with value: 0.03370695344673329 and parameters: {'similarity': 'asymmetric', 'topK': 586, 'shrink': 750, 'feature_weighting': 'none', 'asymmetric_alpha': 1.322272438988854}. Best is trial 3 with value: 0.037970905045071515.


Similarity column 38121 (100.0%), 2410.39 column/sec. Elapsed time 15.82 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 50.00 sec. Users per second: 712


[I 2024-11-15 13:11:45,471] Trial 7 finished with value: 0.04213755387438386 and parameters: {'similarity': 'tversky', 'topK': 526, 'shrink': 46, 'feature_weighting': 'BM25', 'tversky_alpha': 1.7971795977334728, 'tversky_beta': 1.7591021101338662}. Best is trial 7 with value: 0.04213755387438386.


Similarity column 38121 (100.0%), 2319.99 column/sec. Elapsed time 16.43 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.20 min. Users per second: 494


[I 2024-11-15 13:13:16,238] Trial 8 finished with value: 0.03498778587147621 and parameters: {'similarity': 'tversky', 'topK': 1396, 'shrink': 775, 'feature_weighting': 'BM25', 'tversky_alpha': 1.0844586410673038, 'tversky_beta': 0.5794629340326964}. Best is trial 7 with value: 0.04213755387438386.


Similarity column 38121 (100.0%), 2535.71 column/sec. Elapsed time 15.03 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.10 min. Users per second: 539


[I 2024-11-15 13:14:38,211] Trial 9 finished with value: 0.020637333137122182 and parameters: {'similarity': 'asymmetric', 'topK': 425, 'shrink': 791, 'feature_weighting': 'TF-IDF', 'asymmetric_alpha': 1.9944400940206595}. Best is trial 7 with value: 0.04213755387438386.


Similarity column 38121 (100.0%), 2609.58 column/sec. Elapsed time 14.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 53.44 sec. Users per second: 666


[I 2024-11-15 13:15:47,477] Trial 10 finished with value: 0.04045828734640226 and parameters: {'similarity': 'dice', 'topK': 604, 'shrink': 76, 'feature_weighting': 'none'}. Best is trial 7 with value: 0.04213755387438386.


Similarity column 38121 (100.0%), 2532.87 column/sec. Elapsed time 15.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.83 sec. Users per second: 1084


[I 2024-11-15 13:16:35,945] Trial 11 finished with value: 0.05031688617760919 and parameters: {'similarity': 'tversky', 'topK': 12, 'shrink': 433, 'feature_weighting': 'BM25', 'tversky_alpha': 1.9810653173397732, 'tversky_beta': 1.9747412745860602}. Best is trial 11 with value: 0.05031688617760919.


Similarity column 38121 (100.0%), 2526.37 column/sec. Elapsed time 15.09 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.92 sec. Users per second: 939


[I 2024-11-15 13:17:29,641] Trial 12 finished with value: 0.04719205814085553 and parameters: {'similarity': 'tversky', 'topK': 66, 'shrink': 460, 'feature_weighting': 'BM25', 'tversky_alpha': 1.9874403949501185, 'tversky_beta': 1.9868853634136243}. Best is trial 11 with value: 0.05031688617760919.


Similarity column 38121 (100.0%), 2531.84 column/sec. Elapsed time 15.06 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.44 sec. Users per second: 1097


[I 2024-11-15 13:18:17,746] Trial 13 finished with value: 0.05091212538768189 and parameters: {'similarity': 'tversky', 'topK': 10, 'shrink': 515, 'feature_weighting': 'BM25', 'tversky_alpha': 1.7901350129820508, 'tversky_beta': 1.9752432471394596}. Best is trial 13 with value: 0.05091212538768189.


Similarity column 38121 (100.0%), 2679.52 column/sec. Elapsed time 14.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.19 sec. Users per second: 1041


[I 2024-11-15 13:19:06,584] Trial 14 finished with value: 0.04238023108292668 and parameters: {'similarity': 'cosine', 'topK': 40, 'shrink': 565, 'feature_weighting': 'BM25'}. Best is trial 13 with value: 0.05091212538768189.


Similarity column 38121 (100.0%), 2676.80 column/sec. Elapsed time 14.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 47.59 sec. Users per second: 748


[I 2024-11-15 13:20:09,300] Trial 15 finished with value: 0.041256015090400354 and parameters: {'similarity': 'jaccard', 'topK': 230, 'shrink': 960, 'feature_weighting': 'BM25'}. Best is trial 13 with value: 0.05091212538768189.


Similarity column 38121 (100.0%), 2469.14 column/sec. Elapsed time 15.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 44.79 sec. Users per second: 795


[I 2024-11-15 13:21:10,370] Trial 16 finished with value: 0.04103912735202096 and parameters: {'similarity': 'tversky', 'topK': 209, 'shrink': 292, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 1.550165126888654, 'tversky_beta': 1.4324358371869392}. Best is trial 13 with value: 0.05091212538768189.


Similarity column 38121 (100.0%), 1789.96 column/sec. Elapsed time 21.30 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.01 sec. Users per second: 1148


[I 2024-11-15 13:22:03,268] Trial 17 finished with value: 0.05161769755427387 and parameters: {'similarity': 'tversky', 'topK': 3, 'shrink': 611, 'feature_weighting': 'BM25', 'tversky_alpha': 0.3345825602636454, 'tversky_beta': 1.1540400137943267}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2471.24 column/sec. Elapsed time 15.43 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.01 min. Users per second: 590


[I 2024-11-15 13:23:20,650] Trial 18 finished with value: 0.04081636554982371 and parameters: {'similarity': 'cosine', 'topK': 889, 'shrink': 587, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2676.06 column/sec. Elapsed time 14.25 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 46.38 sec. Users per second: 767


[I 2024-11-15 13:24:21,968] Trial 19 finished with value: 0.03644253250300331 and parameters: {'similarity': 'jaccard', 'topK': 201, 'shrink': 632, 'feature_weighting': 'none'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2427.86 column/sec. Elapsed time 15.70 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 51.28 sec. Users per second: 694


[I 2024-11-15 13:25:29,976] Trial 20 finished with value: 0.02974840968835846 and parameters: {'similarity': 'tversky', 'topK': 350, 'shrink': 975, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.05231455526696849, 'tversky_beta': 1.0214948373621293}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2316.45 column/sec. Elapsed time 16.46 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.19 min. Users per second: 497


[I 2024-11-15 13:27:00,396] Trial 21 finished with value: 0.031016930993963344 and parameters: {'similarity': 'tversky', 'topK': 1383, 'shrink': 336, 'feature_weighting': 'BM25', 'tversky_alpha': 0.13530548647104745, 'tversky_beta': 0.22944638703009002}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 3187.87 column/sec. Elapsed time 11.96 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.73 sec. Users per second: 1383


[I 2024-11-15 13:27:38,659] Trial 22 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 408, 'feature_weighting': 'BM25', 'tversky_alpha': 0.6060619023354439, 'tversky_beta': 1.465635272336576}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2498.03 column/sec. Elapsed time 15.26 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.79 sec. Users per second: 832


[I 2024-11-15 13:28:37,483] Trial 23 finished with value: 0.04501176373531889 and parameters: {'similarity': 'tversky', 'topK': 153, 'shrink': 497, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3895037566827795, 'tversky_beta': 1.9682867294487798}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2457.78 column/sec. Elapsed time 15.51 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 48.51 sec. Users per second: 734


[I 2024-11-15 13:29:42,464] Trial 24 finished with value: 0.040008787126779834 and parameters: {'similarity': 'tversky', 'topK': 283, 'shrink': 642, 'feature_weighting': 'BM25', 'tversky_alpha': 0.4989500088642008, 'tversky_beta': 1.4827289546155873}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2469.69 column/sec. Elapsed time 15.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.32 sec. Users per second: 841


[I 2024-11-15 13:30:40,987] Trial 25 finished with value: 0.044812781356395416 and parameters: {'similarity': 'tversky', 'topK': 142, 'shrink': 191, 'feature_weighting': 'BM25', 'tversky_alpha': 1.9743678622696987, 'tversky_beta': 0.9929138366042736}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2519.61 column/sec. Elapsed time 15.13 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.60 sec. Users per second: 1029


[I 2024-11-15 13:31:31,352] Trial 26 finished with value: 0.04925075418564501 and parameters: {'similarity': 'tversky', 'topK': 24, 'shrink': 359, 'feature_weighting': 'BM25', 'tversky_alpha': 1.5712641369360254, 'tversky_beta': 1.6406950876866848}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2498.53 column/sec. Elapsed time 15.26 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.15 min. Users per second: 516


[I 2024-11-15 13:32:57,684] Trial 27 finished with value: 0.03658685899794086 and parameters: {'similarity': 'jaccard', 'topK': 1216, 'shrink': 533, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2556.84 column/sec. Elapsed time 14.91 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 49.25 sec. Users per second: 723


[I 2024-11-15 13:34:02,878] Trial 28 finished with value: 0.041088906949208914 and parameters: {'similarity': 'cosine', 'topK': 471, 'shrink': 870, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2697.58 column/sec. Elapsed time 14.13 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 49.67 sec. Users per second: 717


[I 2024-11-15 13:35:07,602] Trial 29 finished with value: 0.03510718800794538 and parameters: {'similarity': 'dice', 'topK': 300, 'shrink': 691, 'feature_weighting': 'TF-IDF'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2336.85 column/sec. Elapsed time 16.31 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.01 min. Users per second: 586


[I 2024-11-15 13:36:26,189] Trial 30 finished with value: 0.03361512451588196 and parameters: {'similarity': 'tversky', 'topK': 753, 'shrink': 611, 'feature_weighting': 'none', 'tversky_alpha': 1.033041624810307, 'tversky_beta': 1.118216867101797}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 422.35 column/sec. Elapsed time 1.50 min
EvaluatorHoldout: Processed 35595 (100.0%) in 36.87 sec. Users per second: 965


[I 2024-11-15 13:38:33,797] Trial 31 finished with value: 0.024264391958028 and parameters: {'similarity': 'euclidean', 'topK': 89, 'shrink': 490, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'log', 'normalize': True}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2498.41 column/sec. Elapsed time 15.26 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.24 sec. Users per second: 1040


[I 2024-11-15 13:39:23,922] Trial 32 finished with value: 0.04954325112542398 and parameters: {'similarity': 'tversky', 'topK': 21, 'shrink': 385, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6072676631776197, 'tversky_beta': 1.7197063868901936}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2479.09 column/sec. Elapsed time 15.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.82 sec. Users per second: 851


[I 2024-11-15 13:40:21,899] Trial 33 finished with value: 0.045699859753798366 and parameters: {'similarity': 'tversky', 'topK': 134, 'shrink': 397, 'feature_weighting': 'BM25', 'tversky_alpha': 1.719203312637189, 'tversky_beta': 1.9932588107811953}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2509.11 column/sec. Elapsed time 15.19 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.29 sec. Users per second: 1102


[I 2024-11-15 13:41:10,001] Trial 34 finished with value: 0.05121349083717205 and parameters: {'similarity': 'tversky', 'topK': 9, 'shrink': 322, 'feature_weighting': 'BM25', 'tversky_alpha': 1.2858581218850778, 'tversky_beta': 1.74766353653443}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2754.01 column/sec. Elapsed time 13.84 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.19 sec. Users per second: 864


[I 2024-11-15 13:42:05,735] Trial 35 finished with value: 0.0450242498834983 and parameters: {'similarity': 'dice', 'topK': 116, 'shrink': 270, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2435.54 column/sec. Elapsed time 15.65 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 47.44 sec. Users per second: 750


[I 2024-11-15 13:43:09,810] Trial 36 finished with value: 0.04286239261354998 and parameters: {'similarity': 'tversky', 'topK': 287, 'shrink': 452, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3095527701262322, 'tversky_beta': 1.765290322766042}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2463.01 column/sec. Elapsed time 15.48 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 56.84 sec. Users per second: 626


[I 2024-11-15 13:44:23,536] Trial 37 finished with value: 0.04061496175002148 and parameters: {'similarity': 'asymmetric', 'topK': 750, 'shrink': 194, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.10787781692235687}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 397.92 column/sec. Elapsed time 1.60 min
EvaluatorHoldout: Processed 35595 (100.0%) in 41.48 sec. Users per second: 858


[I 2024-11-15 13:46:41,310] Trial 38 finished with value: 0.022324832719058975 and parameters: {'similarity': 'euclidean', 'topK': 207, 'shrink': 515, 'feature_weighting': 'none', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'lin', 'normalize': True}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2378.72 column/sec. Elapsed time 16.03 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 50.19 sec. Users per second: 709


[I 2024-11-15 13:47:48,606] Trial 39 finished with value: 0.03669459773420898 and parameters: {'similarity': 'tversky', 'topK': 376, 'shrink': 324, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 0.7307439970458911, 'tversky_beta': 1.271295875468453}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2491.78 column/sec. Elapsed time 15.30 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.36 sec. Users per second: 861


[I 2024-11-15 13:48:46,000] Trial 40 finished with value: 0.04560686805039762 and parameters: {'similarity': 'tversky', 'topK': 91, 'shrink': 670, 'feature_weighting': 'BM25', 'tversky_alpha': 1.2117232886502216, 'tversky_beta': 0.7477861490861788}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2425.26 column/sec. Elapsed time 15.72 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.06 min. Users per second: 560


[I 2024-11-15 13:50:07,010] Trial 41 finished with value: 0.04035734575704933 and parameters: {'similarity': 'asymmetric', 'topK': 1020, 'shrink': 226, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.002855685859458168}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 3167.77 column/sec. Elapsed time 12.03 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.73 sec. Users per second: 1383


[I 2024-11-15 13:50:45,389] Trial 42 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 385, 'feature_weighting': 'BM25', 'tversky_alpha': 1.7729258578941427, 'tversky_beta': 1.7545103994936297}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2943.50 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 29.34 sec. Users per second: 1213


[I 2024-11-15 13:51:28,284] Trial 43 finished with value: 0.04296214690399144 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 427, 'feature_weighting': 'BM25', 'tversky_alpha': 1.5314547369099525, 'tversky_beta': 1.8142238333401879}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2478.62 column/sec. Elapsed time 15.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.39 sec. Users per second: 904


[I 2024-11-15 13:52:23,743] Trial 44 finished with value: 0.0466677993386786 and parameters: {'similarity': 'tversky', 'topK': 76, 'shrink': 555, 'feature_weighting': 'BM25', 'tversky_alpha': 1.8325870478895259, 'tversky_beta': 1.6049940912587117}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2475.19 column/sec. Elapsed time 15.40 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 43.78 sec. Users per second: 813


[I 2024-11-15 13:53:23,738] Trial 45 finished with value: 0.04499424522348929 and parameters: {'similarity': 'tversky', 'topK': 166, 'shrink': 459, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6046478150048051, 'tversky_beta': 1.8554298520136232}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 393.05 column/sec. Elapsed time 1.62 min
EvaluatorHoldout: Processed 35595 (100.0%) in 56.06 sec. Users per second: 635


[I 2024-11-15 13:55:57,478] Trial 46 finished with value: 0.030907741857804462 and parameters: {'similarity': 'euclidean', 'topK': 280, 'shrink': 300, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'exp', 'normalize': False}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2619.98 column/sec. Elapsed time 14.55 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.76 sec. Users per second: 895


[I 2024-11-15 13:56:52,162] Trial 47 finished with value: 0.041844390263478415 and parameters: {'similarity': 'cosine', 'topK': 67, 'shrink': 145, 'feature_weighting': 'none'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2624.40 column/sec. Elapsed time 14.53 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 53.77 sec. Users per second: 662


[I 2024-11-15 13:58:01,658] Trial 48 finished with value: 0.04043991933056262 and parameters: {'similarity': 'dice', 'topK': 464, 'shrink': 373, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2681.25 column/sec. Elapsed time 14.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 47.86 sec. Users per second: 744


[I 2024-11-15 13:59:04,612] Trial 49 finished with value: 0.03553430234761781 and parameters: {'similarity': 'jaccard', 'topK': 237, 'shrink': 719, 'feature_weighting': 'TF-IDF'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2490.98 column/sec. Elapsed time 15.30 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.18 sec. Users per second: 909


[I 2024-11-15 13:59:59,800] Trial 50 finished with value: 0.0460562813129167 and parameters: {'similarity': 'tversky', 'topK': 70, 'shrink': 805, 'feature_weighting': 'BM25', 'tversky_alpha': 0.8128539874566407, 'tversky_beta': 1.6028430845511972}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2357.94 column/sec. Elapsed time 16.17 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 58.19 sec. Users per second: 612


[I 2024-11-15 14:01:15,723] Trial 51 finished with value: 0.039426681783823095 and parameters: {'similarity': 'tversky', 'topK': 676, 'shrink': 565, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3988443522613943, 'tversky_beta': 1.8580160041283136}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2512.18 column/sec. Elapsed time 15.17 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.54 sec. Users per second: 1001


[I 2024-11-15 14:02:07,099] Trial 52 finished with value: 0.04870819202803869 and parameters: {'similarity': 'tversky', 'topK': 32, 'shrink': 350, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6304015120627318, 'tversky_beta': 1.6048635308339847}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2477.31 column/sec. Elapsed time 15.39 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.77 sec. Users per second: 832


[I 2024-11-15 14:03:06,077] Trial 53 finished with value: 0.045271515751497377 and parameters: {'similarity': 'tversky', 'topK': 165, 'shrink': 257, 'feature_weighting': 'BM25', 'tversky_alpha': 1.888921900727691, 'tversky_beta': 1.649745831863684}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2502.94 column/sec. Elapsed time 15.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.08 sec. Users per second: 986


[I 2024-11-15 14:03:58,053] Trial 54 finished with value: 0.04781852832906185 and parameters: {'similarity': 'tversky', 'topK': 42, 'shrink': 474, 'feature_weighting': 'BM25', 'tversky_alpha': 0.27706301999846095, 'tversky_beta': 1.992613891297061}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2505.41 column/sec. Elapsed time 15.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.53 sec. Users per second: 857


[I 2024-11-15 14:04:55,532] Trial 55 finished with value: 0.04559385681509411 and parameters: {'similarity': 'tversky', 'topK': 117, 'shrink': 419, 'feature_weighting': 'BM25', 'tversky_alpha': 1.486168782765245, 'tversky_beta': 1.3415690789529688}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2600.47 column/sec. Elapsed time 14.66 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.78 sec. Users per second: 873


[I 2024-11-15 14:05:51,600] Trial 56 finished with value: 0.041067673808296026 and parameters: {'similarity': 'asymmetric', 'topK': 182, 'shrink': 338, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.6714026688817581}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2531.76 column/sec. Elapsed time 15.06 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.61 sec. Users per second: 1059


[I 2024-11-15 14:06:40,909] Trial 57 finished with value: 0.05041181211914278 and parameters: {'similarity': 'tversky', 'topK': 13, 'shrink': 594, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6975384381917937, 'tversky_beta': 1.7122048850834588}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2293.46 column/sec. Elapsed time 16.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.21 min. Users per second: 488


[I 2024-11-15 14:08:13,839] Trial 58 finished with value: 0.034509558592364 and parameters: {'similarity': 'tversky', 'topK': 1489, 'shrink': 613, 'feature_weighting': 'none', 'tversky_alpha': 1.74053085657654, 'tversky_beta': 1.7719194440063313}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2610.64 column/sec. Elapsed time 14.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.71 sec. Users per second: 833


[I 2024-11-15 14:09:11,863] Trial 59 finished with value: 0.04110913562409417 and parameters: {'similarity': 'cosine', 'topK': 247, 'shrink': 532, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2485.15 column/sec. Elapsed time 15.34 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.01 sec. Users per second: 847


[I 2024-11-15 14:10:09,954] Trial 60 finished with value: 0.040058974753452645 and parameters: {'similarity': 'tversky', 'topK': 117, 'shrink': 649, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 1.840560431740111, 'tversky_beta': 1.878829408744843}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2501.85 column/sec. Elapsed time 15.24 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.14 min. Users per second: 522


[I 2024-11-15 14:11:35,555] Trial 61 finished with value: 0.03729678570871025 and parameters: {'similarity': 'jaccard', 'topK': 1176, 'shrink': 443, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2494.97 column/sec. Elapsed time 15.28 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.21 sec. Users per second: 983


[I 2024-11-15 14:12:27,708] Trial 62 finished with value: 0.047988403489876645 and parameters: {'similarity': 'tversky', 'topK': 46, 'shrink': 366, 'feature_weighting': 'BM25', 'tversky_alpha': 1.7305390700306653, 'tversky_beta': 1.7349226213303433}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2526.12 column/sec. Elapsed time 15.09 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.98 sec. Users per second: 1048


[I 2024-11-15 14:13:17,390] Trial 63 finished with value: 0.05004167697888586 and parameters: {'similarity': 'tversky', 'topK': 17, 'shrink': 577, 'feature_weighting': 'BM25', 'tversky_alpha': 1.1915414905637616, 'tversky_beta': 1.6576662840758978}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 3191.42 column/sec. Elapsed time 11.94 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.62 sec. Users per second: 1389


[I 2024-11-15 14:13:55,548] Trial 64 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 569, 'feature_weighting': 'BM25', 'tversky_alpha': 1.187922394912016, 'tversky_beta': 1.88910377262988}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2444.09 column/sec. Elapsed time 15.60 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 49.19 sec. Users per second: 724


[I 2024-11-15 14:15:01,364] Trial 65 finished with value: 0.04110422812192565 and parameters: {'similarity': 'tversky', 'topK': 326, 'shrink': 602, 'feature_weighting': 'BM25', 'tversky_alpha': 0.9632795701547263, 'tversky_beta': 1.7030729400729827}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2350.11 column/sec. Elapsed time 16.22 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 1.05 min. Users per second: 564


[I 2024-11-15 14:16:22,506] Trial 66 finished with value: 0.040068415173344085 and parameters: {'similarity': 'tversky', 'topK': 872, 'shrink': 493, 'feature_weighting': 'BM25', 'tversky_alpha': 1.9538070962424121, 'tversky_beta': 0.5587398131480832}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2490.31 column/sec. Elapsed time 15.31 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.19 sec. Users per second: 864


[I 2024-11-15 14:17:19,771] Trial 67 finished with value: 0.0453023688006813 and parameters: {'similarity': 'tversky', 'topK': 101, 'shrink': 718, 'feature_weighting': 'BM25', 'tversky_alpha': 1.1457222372113227, 'tversky_beta': 1.5156515953972616}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2741.99 column/sec. Elapsed time 13.90 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 44.06 sec. Users per second: 808


[I 2024-11-15 14:18:18,478] Trial 68 finished with value: 0.04389744635972886 and parameters: {'similarity': 'dice', 'topK': 152, 'shrink': 526, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 458.78 column/sec. Elapsed time 1.38 min
EvaluatorHoldout: Processed 35595 (100.0%) in 44.20 sec. Users per second: 805


[I 2024-11-15 14:20:26,192] Trial 69 finished with value: 0.03381664871782803 and parameters: {'similarity': 'euclidean', 'topK': 56, 'shrink': 667, 'feature_weighting': 'BM25', 'normalize_avg_row': False, 'similarity_from_distance_mode': 'lin', 'normalize': False}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2442.08 column/sec. Elapsed time 15.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.28 sec. Users per second: 862


[I 2024-11-15 14:21:23,835] Trial 70 finished with value: 0.04474928706323398 and parameters: {'similarity': 'tversky', 'topK': 205, 'shrink': 19, 'feature_weighting': 'none', 'tversky_alpha': 1.293050322279085, 'tversky_beta': 1.3160863991320677}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2479.90 column/sec. Elapsed time 15.37 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 41.57 sec. Users per second: 856


[I 2024-11-15 14:22:21,520] Trial 71 finished with value: 0.04493223700492812 and parameters: {'similarity': 'tversky', 'topK': 119, 'shrink': 586, 'feature_weighting': 'BM25', 'tversky_alpha': 0.9303203092321136, 'tversky_beta': 1.9022632478972958}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2532.31 column/sec. Elapsed time 15.05 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.60 sec. Users per second: 1000


[I 2024-11-15 14:23:12,823] Trial 72 finished with value: 0.04824240295921554 and parameters: {'similarity': 'tversky', 'topK': 37, 'shrink': 300, 'feature_weighting': 'BM25', 'tversky_alpha': 1.650209529323279, 'tversky_beta': 1.5511002298861463}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2529.59 column/sec. Elapsed time 15.07 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.50 sec. Users per second: 1095


[I 2024-11-15 14:24:01,027] Trial 73 finished with value: 0.05089205724898772 and parameters: {'similarity': 'tversky', 'topK': 10, 'shrink': 401, 'feature_weighting': 'BM25', 'tversky_alpha': 1.4706479839783362, 'tversky_beta': 1.7007462163401423}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2503.22 column/sec. Elapsed time 15.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.38 sec. Users per second: 904


[I 2024-11-15 14:24:56,337] Trial 74 finished with value: 0.0463933905912398 and parameters: {'similarity': 'tversky', 'topK': 85, 'shrink': 421, 'feature_weighting': 'BM25', 'tversky_alpha': 1.4397230140876243, 'tversky_beta': 1.4205061441474371}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2538.30 column/sec. Elapsed time 15.02 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.60 sec. Users per second: 1059


[I 2024-11-15 14:25:45,603] Trial 75 finished with value: 0.05016692530830612 and parameters: {'similarity': 'tversky', 'topK': 16, 'shrink': 509, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3131890318899435, 'tversky_beta': 1.6964634649646244}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2943.88 column/sec. Elapsed time 12.95 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 30.00 sec. Users per second: 1187


[I 2024-11-15 14:26:29,152] Trial 76 finished with value: 0.03990265932659396 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 504, 'feature_weighting': 'BM25', 'tversky_alpha': 1.281319753318321, 'tversky_beta': 0.006728388240836436}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2630.25 column/sec. Elapsed time 14.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 53.21 sec. Users per second: 669


[I 2024-11-15 14:27:37,406] Trial 77 finished with value: 0.01944379895517679 and parameters: {'similarity': 'asymmetric', 'topK': 143, 'shrink': 633, 'feature_weighting': 'TF-IDF', 'asymmetric_alpha': 1.9947806633222271}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2527.09 column/sec. Elapsed time 15.08 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.10 sec. Users per second: 910


[I 2024-11-15 14:28:32,276] Trial 78 finished with value: 0.04660077547898707 and parameters: {'similarity': 'tversky', 'topK': 71, 'shrink': 451, 'feature_weighting': 'BM25', 'tversky_alpha': 1.1062765393183756, 'tversky_beta': 1.1757483344162105}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2630.63 column/sec. Elapsed time 14.49 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 40.94 sec. Users per second: 869


[I 2024-11-15 14:29:28,305] Trial 79 finished with value: 0.04123985333235069 and parameters: {'similarity': 'cosine', 'topK': 188, 'shrink': 481, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2755.97 column/sec. Elapsed time 13.83 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.24 sec. Users per second: 982


[I 2024-11-15 14:30:18,971] Trial 80 finished with value: 0.048472722225565056 and parameters: {'similarity': 'jaccard', 'topK': 35, 'shrink': 524, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2469.76 column/sec. Elapsed time 15.44 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 46.08 sec. Users per second: 772


[I 2024-11-15 14:31:21,411] Trial 81 finished with value: 0.04325966728874271 and parameters: {'similarity': 'tversky', 'topK': 238, 'shrink': 551, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3587698807119635, 'tversky_beta': 1.9283796142335492}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2960.14 column/sec. Elapsed time 12.88 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 29.03 sec. Users per second: 1226


[I 2024-11-15 14:32:03,929] Trial 82 finished with value: 0.04288301259540072 and parameters: {'similarity': 'tversky', 'topK': 1, 'shrink': 381, 'feature_weighting': 'BM25', 'tversky_alpha': 1.492182746277373, 'tversky_beta': 1.702944521140242}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2490.85 column/sec. Elapsed time 15.30 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 39.76 sec. Users per second: 895


[I 2024-11-15 14:32:59,709] Trial 83 finished with value: 0.04638017199668959 and parameters: {'similarity': 'tversky', 'topK': 97, 'shrink': 329, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6620095572064992, 'tversky_beta': 1.8045338152618209}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2499.77 column/sec. Elapsed time 15.25 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.32 sec. Users per second: 954


[I 2024-11-15 14:33:52,958] Trial 84 finished with value: 0.04755438386432734 and parameters: {'similarity': 'tversky', 'topK': 53, 'shrink': 399, 'feature_weighting': 'BM25', 'tversky_alpha': 1.9975852143889798, 'tversky_beta': 1.5349953505299967}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2478.48 column/sec. Elapsed time 15.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.83 sec. Users per second: 831


[I 2024-11-15 14:34:51,948] Trial 85 finished with value: 0.04529044787367445 and parameters: {'similarity': 'tversky', 'topK': 136, 'shrink': 589, 'feature_weighting': 'BM25', 'tversky_alpha': 1.8999737791574287, 'tversky_beta': 1.7025143972135763}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2521.10 column/sec. Elapsed time 15.12 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.35 sec. Users per second: 979


[I 2024-11-15 14:35:44,088] Trial 86 finished with value: 0.0480227058374956 and parameters: {'similarity': 'tversky', 'topK': 35, 'shrink': 693, 'feature_weighting': 'BM25', 'tversky_alpha': 0.29687211593555274, 'tversky_beta': 1.4236622482089079}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 415.58 column/sec. Elapsed time 1.53 min
EvaluatorHoldout: Processed 35595 (100.0%) in 36.83 sec. Users per second: 966


[I 2024-11-15 14:37:53,125] Trial 87 finished with value: 0.024460692044762217 and parameters: {'similarity': 'euclidean', 'topK': 88, 'shrink': 431, 'feature_weighting': 'BM25', 'normalize_avg_row': True, 'similarity_from_distance_mode': 'log', 'normalize': True}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2770.37 column/sec. Elapsed time 13.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.61 sec. Users per second: 1000


[I 2024-11-15 14:38:42,981] Trial 88 finished with value: 0.04136517747052781 and parameters: {'similarity': 'dice', 'topK': 31, 'shrink': 471, 'feature_weighting': 'none'}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2486.29 column/sec. Elapsed time 15.33 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 43.33 sec. Users per second: 821


[I 2024-11-15 14:39:42,453] Trial 89 finished with value: 0.04489278189151595 and parameters: {'similarity': 'tversky', 'topK': 182, 'shrink': 280, 'feature_weighting': 'BM25', 'tversky_alpha': 1.2713702506917117, 'tversky_beta': 1.6635853989561964}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2503.30 column/sec. Elapsed time 15.23 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.12 sec. Users per second: 845


[I 2024-11-15 14:40:40,540] Trial 90 finished with value: 0.03959412771991614 and parameters: {'similarity': 'tversky', 'topK': 116, 'shrink': 504, 'feature_weighting': 'TF-IDF', 'tversky_alpha': 1.4287596154555884, 'tversky_beta': 0.9296763238858963}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2519.93 column/sec. Elapsed time 15.13 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.27 sec. Users per second: 955


[I 2024-11-15 14:41:33,619] Trial 91 finished with value: 0.04713699311254993 and parameters: {'similarity': 'tversky', 'topK': 66, 'shrink': 238, 'feature_weighting': 'BM25', 'tversky_alpha': 1.5508638077016477, 'tversky_beta': 1.8019552464439286}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2518.48 column/sec. Elapsed time 15.14 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 34.32 sec. Users per second: 1037


[I 2024-11-15 14:42:23,723] Trial 92 finished with value: 0.0494150986517173 and parameters: {'similarity': 'tversky', 'topK': 23, 'shrink': 358, 'feature_weighting': 'BM25', 'tversky_alpha': 1.5655161182063384, 'tversky_beta': 1.998701785571224}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 3187.80 column/sec. Elapsed time 11.96 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 25.70 sec. Users per second: 1385


[I 2024-11-15 14:43:01,976] Trial 93 finished with value: 0.00028209999175022335 and parameters: {'similarity': 'tversky', 'topK': 0, 'shrink': 408, 'feature_weighting': 'BM25', 'tversky_alpha': 1.6869164918569155, 'tversky_beta': 1.9898082616338229}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2492.23 column/sec. Elapsed time 15.30 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.32 sec. Users per second: 929


[I 2024-11-15 14:43:56,288] Trial 94 finished with value: 0.0468475508197362 and parameters: {'similarity': 'tversky', 'topK': 79, 'shrink': 310, 'feature_weighting': 'BM25', 'tversky_alpha': 1.5859104464024874, 'tversky_beta': 1.927537626298864}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2523.50 column/sec. Elapsed time 15.11 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.49 sec. Users per second: 1003


[I 2024-11-15 14:44:47,579] Trial 95 finished with value: 0.04895357048096049 and parameters: {'similarity': 'tversky', 'topK': 32, 'shrink': 546, 'feature_weighting': 'BM25', 'tversky_alpha': 1.812893124104506, 'tversky_beta': 1.938646826121001}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2482.41 column/sec. Elapsed time 15.36 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 42.14 sec. Users per second: 845


[I 2024-11-15 14:45:45,900] Trial 96 finished with value: 0.04532111363063582 and parameters: {'similarity': 'tversky', 'topK': 155, 'shrink': 345, 'feature_weighting': 'BM25', 'tversky_alpha': 1.3451806316257346, 'tversky_beta': 1.8368629826013398}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2610.05 column/sec. Elapsed time 14.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 38.10 sec. Users per second: 934


[I 2024-11-15 14:46:39,178] Trial 97 finished with value: 0.0414534021409269 and parameters: {'similarity': 'asymmetric', 'topK': 120, 'shrink': 621, 'feature_weighting': 'BM25', 'asymmetric_alpha': 0.608297202391369}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2510.34 column/sec. Elapsed time 15.19 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 37.73 sec. Users per second: 943


[I 2024-11-15 14:47:32,769] Trial 98 finished with value: 0.04737162010002132 and parameters: {'similarity': 'tversky', 'topK': 62, 'shrink': 362, 'feature_weighting': 'BM25', 'tversky_alpha': 1.2349592073597655, 'tversky_beta': 1.7505445852322037}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2522.55 column/sec. Elapsed time 15.11 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 35.38 sec. Users per second: 1006


[I 2024-11-15 14:48:23,928] Trial 99 finished with value: 0.049133616278368104 and parameters: {'similarity': 'tversky', 'topK': 28, 'shrink': 574, 'feature_weighting': 'BM25', 'tversky_alpha': 1.0506092035328631, 'tversky_beta': 1.5660235235456121}. Best is trial 17 with value: 0.05161769755427387.


Similarity column 38121 (100.0%), 2635.77 column/sec. Elapsed time 14.46 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 36.90 sec. Users per second: 965


[I 2024-11-15 14:49:15,809] Trial 100 finished with value: 0.041664031197531724 and parameters: {'similarity': 'cosine', 'topK': 86, 'shrink': 389, 'feature_weighting': 'BM25'}. Best is trial 17 with value: 0.05161769755427387.


In [22]:
recommender_instance = ItemKNNCFRecommender(URM_train + URM_validation)
recommender_instance.fit(**study.best_trial.params)

Similarity column 38121 (100.0%), 1664.39 column/sec. Elapsed time 22.90 sec


# Testing

Create the recommendations for the submission. 

In [23]:
def create_submission(data_target_users_test, recommender_instance, cutoff=10, output_file=f"/kaggle/working/submission_{config['model']}.csv"):
    
    target_result = []

    for target in data_target_users_test["user_id"]:
        target_result.append(recommender_instance.recommend(target, cutoff=cutoff, remove_seen_flag=True))

    user_ids = data_target_users_test["user_id"]
    formatted_data = {
        "user_id": user_ids,
        "item_list": [" ".join(map(str, items)) for items in target_result]
    }

    submission_df = pd.DataFrame(formatted_data)
    submission_df.to_csv(output_file, index=False, header=["user_id", "item_list"])

    print(f"Submission file saved as {output_file}")

In [24]:
data_target_users_test = pd.read_csv('/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv')

create_submission(data_target_users_test, recommender_instance)

Submission file saved as /kaggle/working/submission_ItemKNNCF.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [25]:
import json

if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        json.dump(study.best_params, params_file)
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'TrainedModels/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)'
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'/kaggle/working/RECsys_Challenge2024/TrainedModels/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/ItemKNNCFRecommender/best_params_ItemKNNCF.json' created successfully.


In [26]:
if config['save_github']:
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'TrainedModels/{config["model"]}Recommender/Submission/'\
                    f'submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)'
            )

File 'TrainedModels/ItemKNNCFRecommender/Submission/submission_ItemKNNCF.csv' created successfully.


Save the history of the tuned model.

In [30]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'TrainedModels/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)'
    )

File 'TrainedModels/ItemKNNCFRecommender/history_ItemKNNCF.db' created successfully.
