In [1]:
import pandas as pd
import scipy.sparse as sps
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [2]:
URM_all_dataframe = pd.read_csv(filepath_or_buffer="Data/data_train.csv",
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                engine='python')
users = pd.read_csv(filepath_or_buffer="Data/data_target_users_test.csv")

In [3]:
n_interactions = len(URM_all_dataframe)
mapped_id, original_id = pd.factorize(URM_all_dataframe["user_id"].unique())
user_original_ID_to_index = pd.Series(mapped_id, index=original_id)
mapped_id, original_id = pd.factorize(URM_all_dataframe["item_id"].unique())
item_original_ID_to_index = pd.Series(mapped_id, index=original_id)

userID_unique = URM_all_dataframe["user_id"].unique()
itemID_unique = URM_all_dataframe["item_id"].unique()

URM_all = sps.coo_matrix((URM_all_dataframe["data"].values,
                          (URM_all_dataframe["user_id"].values, URM_all_dataframe["item_id"].values)))

URM_all.tocsr()

<35736x38121 sparse matrix of type '<class 'numpy.float64'>'
	with 1764607 stored elements in Compressed Sparse Row format>

In [4]:
from Evaluation.Evaluator import EvaluatorHoldout

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 434 ( 1.2%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 146 ( 0.4%) Users that have less than 1 test interactions


In [5]:
class SaveResults(object):

    def __init__(self):

        self.results_df = pd.DataFrame(columns = ["result"])

    def __call__(self, optuna_study, optuna_trial):

        hyperparam_dict = optuna_trial.params.copy()

        hyperparam_dict["result"] = optuna_trial.values[0]

        self.results_df = pd.concat([self.results_df, pd.DataFrame([hyperparam_dict])], ignore_index=True)

In [6]:
from Recommenders.MatrixFactorization.PyTorch.MF_MSE_PyTorch import MF_MSE_PyTorch

In [7]:
import optuna

def objective_function_MF_MSE_PyTorch(optuna_trial):
    # Suggest hyperparameters
    #epochs = optuna_trial.suggest_int('epochs', 10, 100)
    batch_size = optuna_trial.suggest_int('batch_size', 32, 512)
    num_factors = optuna_trial.suggest_int('num_factors', 10, 200)
    learning_rate = optuna_trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    #use_cuda = optuna_trial.suggest_categorical('use_cuda', [True, False])

    # Initialize and train the recommender
    recommender = MF_MSE_PyTorch(URM_train)
    recommender.fit(epochs=100, batch_size=batch_size, num_factors=num_factors, learning_rate=learning_rate, use_cuda=True)

    # Evaluate the recommender
    result_df, _ = evaluator_validation.evaluateRecommender(recommender)
    return result_df.loc[10]["MAP"]

In [None]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
def objective_function_RP3betaRecommender(optuna_trial):
    # Suggest hyperparameters
    alpha = optuna_trial.suggest_float('alpha', 0.1, 0.9)
    beta = optuna_trial.suggest_float('beta', 0.1, 0.9)
    topK = optuna_trial.suggest_int('topK', 50, 500)
    #implicit = optuna_trial.suggest_categorical('implicit', [True, False])

    # Initialize and train the recommender
    recommender = RP3betaRecommender(URM_train)
    recommender.fit(alpha=alpha, beta=beta, topK=topK, implicit=True)

    # Evaluate the recommender
    result_df, _ = evaluator_validation.evaluateRecommender(recommender)
    return result_df.loc[10]["MAP"]

In [18]:
import torch

print(torch.__version__)  # Check PyTorch version
print(torch.version.cuda)  # Check if CUDA is available
print(torch.cuda.is_available())  # Check if PyTorch detects CUDA

if torch.cuda.is_available():
    device = torch.device('cuda')
    print("MF_MSE_PyTorch: Using CUDA")
else:
    device = torch.device('cpu')
    print("MF_MSE_PyTorch: Using CPU")

1.13.1+cpu
None
False
MF_MSE_PyTorch: Using CPU


In [None]:
optuna_study_rp3 = optuna.create_study(direction="maximize")

save_results_rp3 = SaveResults()

optuna_study_rp3.optimize(objective_function_RP3betaRecommender,
                         callbacks=[save_results_rp3],
                         n_trials = 50)

In [None]:
optuna_study_rp3.best_trial.params

In [9]:
# Create the Optuna study
optuna_study_mf_mse = optuna.create_study(direction="maximize")

save_results_mf_mse = SaveResults()

# Optimize the study
optuna_study_mf_mse.optimize(objective_function_MF_MSE_PyTorch,
                             callbacks=[save_results_mf_mse],
                             n_trials=50)


[I 2024-11-26 13:47:42,976] A new study created in memory with name: no-name-e2509fc1-8b09-4b2e-858b-b2ff362d6757


MF_MSE_PyTorch_Recommender: MF_MSE_PyTorch: Using CPU


[W 2024-11-26 13:49:04,575] Trial 0 failed with parameters: {'batch_size': 88, 'num_factors': 34, 'learning_rate': 7.592466056703098e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "D:\software\Anaconda\InstallationFolder\envs\RecSysFramework\lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\filip\AppData\Local\Temp\ipykernel_15128\4167526612.py", line 13, in objective_function_MF_MSE_PyTorch
    recommender.fit(epochs=100, batch_size=batch_size, num_factors=num_factors, learning_rate=learning_rate, use_cuda=True)
  File "D:\UNIVERSITA\Progetti python\RS\RecSysProject\Recommenders\MatrixFactorization\PyTorch\MF_MSE_PyTorch.py", line 83, in fit
    self._train_with_early_stopping(epochs,
  File "D:\UNIVERSITA\Progetti python\RS\RecSysProject\Recommenders\Incremental_Training_Early_Stopping.py", line 199, in _train_with_early_stopping
    self._run_epoch(epochs_curre

KeyboardInterrupt: 

In [None]:
optuna_study_mf_mse.best_trial.params

In [None]:
recom = MF_MSE_PyTorch(URM_train+URM_validation)
recom.fit(**optuna_study_mf_mse.best_trial.params)

result_def, _ = evaluator_test.evaluateRecommender(recom)
result_def

In [None]:
cutoff = 10  # Numero di raccomandazioni da generare
recommendations_list = []
for user_id in users["user_id"]:
    recommendations = recom.recommend(user_id, cutoff=cutoff)
    recommendations_list.append([user_id, recommendations])


#result_df, _ = evaluator_validation.evaluateRecommender(recommender_instance)
print (result_def.loc[10]["MAP"])
df_recommendations = pd.DataFrame(recommendations_list, columns=['user_id', 'item_list'])
df_recommendations.to_csv('recomm.csv', index=False)