# Set connection with GitHub

In [1]:
# ! cd /kaggle/working && rm -rf RECsys_Challenge2024

In [2]:
from kaggle_secrets import UserSecretsClient

token = UserSecretsClient().get_secret("Token")

! git clone https://{token}@github.com/madratak/RECsys_Challenge2024.git

Cloning into 'RECsys_Challenge2024'...
remote: Enumerating objects: 1784, done.[K
remote: Counting objects: 100% (166/166), done.[K
remote: Compressing objects: 100% (132/132), done.[K
remote: Total 1784 (delta 98), reused 61 (delta 29), pack-reused 1618 (from 1)[K
Receiving objects: 100% (1784/1784), 69.71 MiB | 23.71 MiB/s, done.
Resolving deltas: 100% (1043/1043), done.


In [3]:
!pip install PyGithub requests

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [4]:
!cd RECsys_Challenge2024 && python run_compile_all_cython.py

run_compile_all_cython: Found 11 Cython files in 5 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/11]: MatrixFactorization_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
                 from [01m[KMatrixFactorization_Cython_Epoch.c:1252[m[K:
      |  [01;35m[K^~~~~~~[m[K
[01m[KMatrixFactorization_Cython_Epoch.c:[m[K In function '[01m[K__pyx_pf_32MatrixFactorization_Cython_Epoch_32MatrixFactorization_Cython_Epoch_10epochIteration_Cython_ASY_SVD_SGD[m[K':
26255 |  for (__pyx_t_21 = __pyx_v_start_pos_seen_items; [01;35m[K__pyx_t_21 < __pyx_t_20[m[K; 

In [5]:
import numpy as np
import pandas as pd
import scipy.sparse as sps
import time
import shutil

np.random.seed(42)

## Import the repository

In [6]:
from github import Github, Auth

# Authenticate using a personal access token
auth_token = Auth.Token(token)
github_client = Github(auth=auth_token)

# Define the repository name you want to find
target_repo_name = 'RECsys_Challenge2024'
repo = None

# Search for the repository in the user's repositories
try:
    for repository in github_client.get_user().get_repos():
        if repository.name == target_repo_name:
            repo = repository
            print(f"Repository '{target_repo_name}' found.")
            break
    if repo is None:
        print(f"Repository '{target_repo_name}' not found.")
except Exception as e:
    print("An error occurred while accessing the repositories:", e)

Repository 'RECsys_Challenge2024' found.


In [7]:
def upload_file(filepath_kaggle, filepath_github, commit_message):
    """
    Uploads a file from Kaggle to GitHub, updating it if it already exists in the repository,
    or creating it if it does not.

    Parameters:
    - filepath_kaggle: Path to the file in the Kaggle environment.
    - filepath_github: Target path in the GitHub repository where the file should be uploaded.
    - commit_message: Message for the commit on GitHub.
    """
    try:
        
        # Check if the file already exists in the GitHub repository
        contents = repo.get_contents(filepath_github)
        
        # If it exists, update the file
        with open(filepath_kaggle, "rb") as file:
            repo.update_file(
                contents.path, commit_message, file.read(), contents.sha
            )
        print(f"File '{filepath_github}' updated successfully.")
    
    except Exception as e:
        
        # If the file does not exist, create it
        with open(filepath_kaggle, "rb") as file:
            repo.create_file(
                filepath_github, commit_message, file.read()
            )
        print(f"File '{filepath_github}' created successfully.")

In [39]:
config = {
    'model': 'RP3beta',
    'tune_parameters': True,
    'database_path': '/kaggle/working/history_RP3betaRecommender.db',
    'copy_prev_best_params': False,
    'save_github': True,
    'fixed' : True
}

Import the database where previous tuning trials have been saved.

In [None]:
try:
    shutil.copyfile(
        f'/kaggle/working/RECsys_Challenge2024/TrainedModels/FixedTopK/' \
        f'{config["model"]}Recommender/history_{config["model"]}.db', 
        config['database_path']
    )
except FileNotFoundError:
    pass # if not present optuna will create it

# Construction of URM and ICM matrices

In [11]:
URM_all_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_train.csv")

n_users = len(URM_all_dataframe["user_id"].unique())
n_items = len(URM_all_dataframe["item_id"].unique())

URM_all = sps.csr_matrix((URM_all_dataframe["data"].values, 
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)),
                        shape = (n_users, n_items))

URM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1764607 stored elements and shape (35736, 38121)>

In [12]:
ICM_dataframe = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_ICM_metadata.csv")

n_items = len(ICM_dataframe["item_id"].unique())
n_features = len(ICM_dataframe["feature_id"].unique())

ICM_all = sps.csr_matrix((ICM_dataframe["data"].values, 
                          (ICM_dataframe["item_id"].values, ICM_dataframe["feature_id"].values)),
                        shape = (n_items, n_features))

ICM_all

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2940040 stored elements and shape (38121, 94331)>

# Training

In [13]:
cd /kaggle/working/RECsys_Challenge2024/

/kaggle/working/RECsys_Challenge2024


In [14]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


In [44]:
import optuna
import pandas as pd
# from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

def objective_function_RP3betaRecommender(optuna_trial):
    
    recommender_instance = RP3betaRecommender(URM_train)
    
    

    full_hyperp = {

# {'topK': 14, 'alpha': 1.3115516327088887, 'normalize_similarity': True, 'implicit': False}
        # {"alpha": 0.369533359594755, "normalize_similarity": true, "implicit": true}
                   # "topK": optuna_trial.suggest_int("topK", 0, 1500),
                   "alpha": optuna_trial.suggest_float("alpha", 0, 3, log=False),
                   "beta": optuna_trial.suggest_float("beta", 0, 3, log=False),
    
                   "normalize_similarity": optuna_trial.suggest_categorical("normalize_similarity", [True, False]),
                   "implicit": optuna_trial.suggest_categorical("implicit", [True, False]),
                   "topK": 10  # Fixed value, not tuned
    #                 "alpha": 0.369533359594755,
    #                 "normalize_similarity": True,
    #                 "implicit": True
    }

    recommender_instance.fit(**full_hyperp)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
    
    return result_df.loc[10]["MAP"]

In [45]:
if config['tune_parameters']:
    
    study = optuna.create_study(direction='maximize', study_name=f'hyperparameters_tuning_{config["model"]}',
                                storage=f'sqlite:///{config["database_path"]}', load_if_exists=True)

    study.optimize(objective_function_RP3betaRecommender, n_trials=30)

[I 2024-11-15 22:37:19,750] A new study created in RDB with name: hyperparameters_tuning_RP3beta


RP3betaRecommender: Similarity column 38121 (100.0%), 2146.37 column/sec. Elapsed time 17.76 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.31 sec. Users per second: 1102


[I 2024-11-15 22:38:11,393] Trial 0 finished with value: 0.01367078486589691 and parameters: {'alpha': 1.7575884479692978, 'beta': 0.978229592098218, 'normalize_similarity': False, 'implicit': True}. Best is trial 0 with value: 0.01367078486589691.


RP3betaRecommender: Similarity column 38121 (100.0%), 2130.27 column/sec. Elapsed time 17.89 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.05 sec. Users per second: 1111


[I 2024-11-15 22:39:02,916] Trial 1 finished with value: 0.011259298278472543 and parameters: {'alpha': 2.066275500083099, 'beta': 0.8856867039648911, 'normalize_similarity': False, 'implicit': False}. Best is trial 0 with value: 0.01367078486589691.


RP3betaRecommender: Similarity column 38121 (100.0%), 2149.05 column/sec. Elapsed time 17.74 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.02 sec. Users per second: 1078


[I 2024-11-15 22:39:55,254] Trial 2 finished with value: 0.034956129026503246 and parameters: {'alpha': 0.9092566601324303, 'beta': 0.9068601032025027, 'normalize_similarity': True, 'implicit': True}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2163.94 column/sec. Elapsed time 17.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.18 sec. Users per second: 1106


[I 2024-11-15 22:40:46,441] Trial 3 finished with value: 0.02110786917192288 and parameters: {'alpha': 0.279890482392513, 'beta': 1.52909926792103, 'normalize_similarity': True, 'implicit': False}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2161.81 column/sec. Elapsed time 17.63 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.55 sec. Users per second: 1128


[I 2024-11-15 22:41:37,053] Trial 4 finished with value: 0.008203393779668604 and parameters: {'alpha': 1.5810604901060665, 'beta': 2.4930976778346623, 'normalize_similarity': True, 'implicit': False}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2164.42 column/sec. Elapsed time 17.61 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.68 sec. Users per second: 1089


[I 2024-11-15 22:42:29,000] Trial 5 finished with value: 0.02141373186442713 and parameters: {'alpha': 1.6004806209312807, 'beta': 0.5548901207341278, 'normalize_similarity': False, 'implicit': False}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2106.32 column/sec. Elapsed time 18.10 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.95 sec. Users per second: 1080


[I 2024-11-15 22:43:21,513] Trial 6 finished with value: 0.01744835416959326 and parameters: {'alpha': 1.3641339193107898, 'beta': 1.3974467797659966, 'normalize_similarity': True, 'implicit': False}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2168.58 column/sec. Elapsed time 17.58 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.98 sec. Users per second: 1113


[I 2024-11-15 22:44:12,686] Trial 7 finished with value: 0.007413656055670387 and parameters: {'alpha': 2.7877085827534884, 'beta': 0.816692614985562, 'normalize_similarity': False, 'implicit': True}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2156.86 column/sec. Elapsed time 17.67 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 30.79 sec. Users per second: 1156


[I 2024-11-15 22:45:02,331] Trial 8 finished with value: 0.011348652276381254 and parameters: {'alpha': 0.12392215987581334, 'beta': 2.8305491511868746, 'normalize_similarity': True, 'implicit': True}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2178.74 column/sec. Elapsed time 17.50 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.04 sec. Users per second: 1147


[I 2024-11-15 22:45:52,276] Trial 9 finished with value: 0.013472429024051843 and parameters: {'alpha': 1.2500374931583262, 'beta': 1.689567021788299, 'normalize_similarity': False, 'implicit': True}. Best is trial 2 with value: 0.034956129026503246.


RP3betaRecommender: Similarity column 38121 (100.0%), 2176.97 column/sec. Elapsed time 17.51 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.74 sec. Users per second: 1121


[I 2024-11-15 22:46:43,048] Trial 10 finished with value: 0.05301452406593372 and parameters: {'alpha': 0.6297709774300776, 'beta': 0.035950479827178916, 'normalize_similarity': True, 'implicit': True}. Best is trial 10 with value: 0.05301452406593372.


RP3betaRecommender: Similarity column 38121 (100.0%), 2220.10 column/sec. Elapsed time 17.17 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.54 sec. Users per second: 1129


[I 2024-11-15 22:47:33,254] Trial 11 finished with value: 0.05302517965560321 and parameters: {'alpha': 0.7363270150172909, 'beta': 0.10616411527194304, 'normalize_similarity': True, 'implicit': True}. Best is trial 11 with value: 0.05302517965560321.


RP3betaRecommender: Similarity column 38121 (100.0%), 2222.80 column/sec. Elapsed time 17.15 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.34 sec. Users per second: 1136


[I 2024-11-15 22:48:23,179] Trial 12 finished with value: 0.05258527816239371 and parameters: {'alpha': 0.689446099687959, 'beta': 0.011381346569775748, 'normalize_similarity': True, 'implicit': True}. Best is trial 11 with value: 0.05302517965560321.


RP3betaRecommender: Similarity column 38121 (100.0%), 2207.15 column/sec. Elapsed time 17.27 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.83 sec. Users per second: 1118


[I 2024-11-15 22:49:13,764] Trial 13 finished with value: 0.052938811184911255 and parameters: {'alpha': 0.6611479688224743, 'beta': 0.03554757316206898, 'normalize_similarity': True, 'implicit': True}. Best is trial 11 with value: 0.05302517965560321.


RP3betaRecommender: Similarity column 38121 (100.0%), 2187.97 column/sec. Elapsed time 17.42 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.05 sec. Users per second: 1077


[I 2024-11-15 22:50:06,064] Trial 14 finished with value: 0.05356191903178644 and parameters: {'alpha': 0.4322828638807346, 'beta': 0.36751593598531274, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2147.44 column/sec. Elapsed time 17.75 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.36 sec. Users per second: 1100


[I 2024-11-15 22:50:57,983] Trial 15 finished with value: 0.05233735119744217 and parameters: {'alpha': 0.03001577935189692, 'beta': 0.4395219935968646, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2221.29 column/sec. Elapsed time 17.16 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.45 sec. Users per second: 1132


[I 2024-11-15 22:51:47,884] Trial 16 finished with value: 0.013279613910461306 and parameters: {'alpha': 1.019829680816093, 'beta': 2.027252373784033, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2192.97 column/sec. Elapsed time 17.38 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.13 sec. Users per second: 1074


[I 2024-11-15 22:52:40,115] Trial 17 finished with value: 0.050410147670997316 and parameters: {'alpha': 0.4176667640716413, 'beta': 0.5525597299692944, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2162.95 column/sec. Elapsed time 17.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.86 sec. Users per second: 1083


[I 2024-11-15 22:53:32,257] Trial 18 finished with value: 0.05105143958599316 and parameters: {'alpha': 1.0151520876956865, 'beta': 0.36107287162110296, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2207.25 column/sec. Elapsed time 17.27 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.78 sec. Users per second: 1120


[I 2024-11-15 22:54:22,826] Trial 19 finished with value: 0.013441531381481141 and parameters: {'alpha': 2.1690803355094834, 'beta': 1.2705743617742673, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2178.56 column/sec. Elapsed time 17.50 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.63 sec. Users per second: 1125


[I 2024-11-15 22:55:13,285] Trial 20 finished with value: 0.016227592380774286 and parameters: {'alpha': 0.4324254941559375, 'beta': 1.9636925153041123, 'normalize_similarity': True, 'implicit': True}. Best is trial 14 with value: 0.05356191903178644.


RP3betaRecommender: Similarity column 38121 (100.0%), 2163.94 column/sec. Elapsed time 17.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.56 sec. Users per second: 1093


[I 2024-11-15 22:56:05,058] Trial 21 finished with value: 0.05364332202890802 and parameters: {'alpha': 0.6549883139210795, 'beta': 0.18941184361517613, 'normalize_similarity': True, 'implicit': True}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2198.52 column/sec. Elapsed time 17.34 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.77 sec. Users per second: 1086


[I 2024-11-15 22:56:56,817] Trial 22 finished with value: 0.05304644066738282 and parameters: {'alpha': 0.7735501582101749, 'beta': 0.2801444193303202, 'normalize_similarity': True, 'implicit': True}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2192.61 column/sec. Elapsed time 17.39 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.56 sec. Users per second: 1093


[I 2024-11-15 22:57:48,397] Trial 23 finished with value: 0.05011111557044798 and parameters: {'alpha': 1.138984691357924, 'beta': 0.3622079565974802, 'normalize_similarity': True, 'implicit': True}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2190.73 column/sec. Elapsed time 17.40 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.58 sec. Users per second: 1092


[I 2024-11-15 22:58:40,101] Trial 24 finished with value: 0.047136246173328775 and parameters: {'alpha': 0.3883952493785124, 'beta': 0.6688458226307188, 'normalize_similarity': True, 'implicit': True}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2224.19 column/sec. Elapsed time 17.14 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.94 sec. Users per second: 1114


[I 2024-11-15 22:59:30,629] Trial 25 finished with value: 0.028171394680454313 and parameters: {'alpha': 0.006954524014365082, 'beta': 1.1283047292828896, 'normalize_similarity': True, 'implicit': True}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2215.12 column/sec. Elapsed time 17.21 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 31.88 sec. Users per second: 1117


[I 2024-11-15 23:00:21,336] Trial 26 finished with value: 0.04823045304650708 and parameters: {'alpha': 0.8720460920669386, 'beta': 0.20988776707217238, 'normalize_similarity': False, 'implicit': False}. Best is trial 21 with value: 0.05364332202890802.


RP3betaRecommender: Similarity column 38121 (100.0%), 2206.48 column/sec. Elapsed time 17.28 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.11 sec. Users per second: 1075


[I 2024-11-15 23:01:13,400] Trial 27 finished with value: 0.053731276909319704 and parameters: {'alpha': 0.5641307833488952, 'beta': 0.28146845879960086, 'normalize_similarity': True, 'implicit': True}. Best is trial 27 with value: 0.053731276909319704.


RP3betaRecommender: Similarity column 38121 (100.0%), 2163.79 column/sec. Elapsed time 17.62 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 33.41 sec. Users per second: 1066


[I 2024-11-15 23:02:06,099] Trial 28 finished with value: 0.047324899609137515 and parameters: {'alpha': 0.5266720702014889, 'beta': 0.6504907829362314, 'normalize_similarity': True, 'implicit': True}. Best is trial 27 with value: 0.053731276909319704.


RP3betaRecommender: Similarity column 38121 (100.0%), 2143.06 column/sec. Elapsed time 17.79 sec
EvaluatorHoldout: Processed 35595 (100.0%) in 32.80 sec. Users per second: 1085


[I 2024-11-15 23:02:58,191] Trial 29 finished with value: 0.028965315264092448 and parameters: {'alpha': 0.2527401984085997, 'beta': 1.1042048190913074, 'normalize_similarity': False, 'implicit': True}. Best is trial 27 with value: 0.053731276909319704.


In [47]:
recommender_instance = RP3betaRecommender(URM_train + URM_validation)
recommender_instance.fit(**study.best_trial.params)
# {"alpha": 0.369533359594755, "normalize_similarity": true, "implicit": true}
# recommender_instance.fit(topK=10, alpha=0.369533359594755, normalize_similarity=True, implicit=True)


RP3betaRecommender: Similarity column 38121 (100.0%), 1396.82 column/sec. Elapsed time 27.29 sec


# Testing

Create the recommendations for the submission. 

In [48]:
def create_submission(data_target_users_test, recommender_instance, cutoff=10, output_file=f"/kaggle/working/submission_{config['model']}.csv"):
    
    target_result = []

    for target in data_target_users_test["user_id"]:
        target_result.append(recommender_instance.recommend(target, cutoff=cutoff, remove_seen_flag=True))

    user_ids = data_target_users_test["user_id"]
    formatted_data = {
        "user_id": user_ids,
        "item_list": [" ".join(map(str, items)) for items in target_result]
    }

    submission_df = pd.DataFrame(formatted_data)
    submission_df.to_csv(output_file, index=False, header=["user_id", "item_list"])

    print(f"Submission file saved as {output_file}")

In [49]:
data_target_users_test = pd.read_csv("/kaggle/input/recommender-system-2024-challenge-polimi/data_target_users_test.csv")

create_submission(data_target_users_test, recommender_instance)

# upload_file(
#             f'/kaggle/working/submission_{config["model"]}_fixed_topk.csv', 
#             f'/kaggle/working/Recsys_Challenge_2023/TrainedModels/{config["model"]}Recommender/'\
#                 f'submission_{config["model"]}.csv', 
#             f'New {config["model"]} submission (from kaggle notebook)'
#         )

Submission file saved as /kaggle/working/submission_RP3beta.csv


# Save Version on GitHub 

Write or import a json file where best hyperparameters are saved. 

In [None]:
import json

if config['tune_parameters']:
    with open(f'/kaggle/working/best_params_{config["model"]}.json', 'w') as params_file:
        # json.dump("topK=10, alpha=0.369533359594755, normalize_similarity=True, implicit=True", params_file)
        json.dump(study.best_trial.params, params_file)
        
        
    if config['save_github']:
        upload_file(
            f'/kaggle/working/best_params_{config["model"]}.json', 
            f'TrainedModels/FixedTopK/{config["model"]}Recommender/best_params_{config["model"]}.json', 
            f'{config["model"]} tuning results (from kaggle notebook)'
        )
elif config['copy_prev_best_params']:
    shutil.copyfile(
        f'/kaggle/working/RECsys_Challenge2024/TrainedModels/FixedTopK/{config["model"]}Recommender/'\
        f'best_params_{config["model"]}.json', 
        f'/kaggle/working/best_params_{config["model"]}.json'
    )

File 'TrainedModels/RP3betaRecommender/best_params_RP3beta.json' created successfully.


Save the history of the tuned model.

In [None]:
if config['save_github']:
    upload_file(
                f'/kaggle/working/submission_{config["model"]}.csv', 
                f'TrainedModels/FixedTopK/{config["model"]}Recommender/Submission/'\
                    f'submission_{config["model"]}.csv', 
                f'New {config["model"]} submission (from kaggle notebook)'
            )

File 'TrainedModels/RP3betaRecommender/Submission/submission_RP3beta.csv' created successfully.


In [None]:
if config['save_github'] and config['tune_parameters']:
    upload_file(
        config['database_path'], 
        f'TrainedModels/FixedTopK/{config["model"]}Recommender/history_{config["model"]}.db',
        f'Tuning {config["model"]} db updated results (from kaggle notebook)'
    )

File 'TrainedModels/RP3betaRecommender/history_RP3beta.db' created successfully.
