In [4]:
from Diffusion.similarity_models import TwoRandomWalksSampler
similarity_batch = TwoRandomWalksSampler(URM_train)
_ = similarity_batch.sample_warm_batch(500)
_

TypeError: __init__() missing 1 required positional argument: 'warm_user_ids'

In [1]:

from Data_manager.DataSplitter_Holdout import DataSplitter_Holdout
from Data_manager.Movielens.Movielens100KReader import Movielens100KReader
from Evaluation.Evaluator import EvaluatorHoldout
import numpy as np

dataset_reader = Movielens100KReader()

dataSplitter = DataSplitter_Holdout(dataset_reader, user_wise=False, split_interaction_quota_list=[80, 10, 10])
dataSplitter.load_data() #"results_experiments/Movielens1M/data"
URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

cutoff_list = [10, 50]
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list)
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list)

DataSplitter_Holdout: Verifying data consistency...
DataSplitter_Holdout: Verifying data consistency... Passed!
DataSplitter_Holdout: DataReader: Movielens100K
	Num items: 1682
	Num users: 943
	Train 		quota 80.00 (80.00), 	interactions 80000, 	density 5.04E-02
	Validation 	quota 10.00 (10.00), 	interactions 10000, 	density 6.30E-03
	Test 		quota 10.00 (10.00), 	interactions 10000, 	density 6.30E-03





DataSplitter_Holdout: Done.
EvaluatorHoldout: Ignoring 27 ( 2.9%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 18 ( 1.9%) Users that have less than 1 test interactions


In [2]:
import optuna
import numpy as np
import pandas as pd
import os
from Diffusion.MultiBlockAttentionDiffusionRecommenderSimilarity import MultiBlockAttentionDiffusionRecommenderInfSimilarity

def objective(trial):

    cutoff = 10
    metric = 'NDCG'
    directory_path = '/Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Self-Attention/OptunaResults/Movielens100K'

    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256, 512])
    embeddings_dim = trial.suggest_categorical('embeddings_dim', [64, 128, 256, 512,1024])
    heads = trial.suggest_categorical('heads', [1])
    attention_blocks = trial.suggest_categorical('attention_blocks', [2, 3, 5, 8, 13])
    d_ff = trial.suggest_categorical('d_ff', [512, 1024, 2048])
    epochs = trial.suggest_int('epochs', 1, 2)
    l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-3)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    noise_timesteps = trial.suggest_int('noise_timesteps', 1, 2000)
    inference_timesteps = trial.suggest_int('inference_timesteps', 1, noise_timesteps-1)
    start_beta = trial.suggest_float('start_beta', 0.00001, 0.001)
    end_beta = trial.suggest_float('end_beta', 0.01, 0.2)

    # Initialize and train the recommender

    diffusion_model = MultiBlockAttentionDiffusionRecommenderInfSimilarity(URM_train = URM_train, verbose = False, use_gpu = True)

    diffusion_model.fit(
                      epochs=epochs,
                      batch_size=batch_size,
                      embeddings_dim=embeddings_dim,
                      heads=heads,
                      attention_blocks = attention_blocks,
                      d_ff = d_ff,
                      l2_reg=l2_reg,
                      learning_rate=learning_rate,
                      noise_timesteps = noise_timesteps,
                      inference_timesteps = inference_timesteps,
                      start_beta = start_beta,
                      end_beta = end_beta
    )

    result_df, _ = evaluator_validation.evaluateRecommender(diffusion_model)
    hyperparams = {
    'batch_size': batch_size,
    'embeddings_dim': embeddings_dim,
    'heads': heads,
    'attention_blocks': attention_blocks,
    'd_ff': d_ff,
    'epochs': epochs,
    'l2_reg': l2_reg,
    'learning_rate': learning_rate,
    'noise_timesteps': noise_timesteps,
    'inference_timesteps': inference_timesteps,
    'start_beta': start_beta,
    'end_beta': end_beta}

    result_df['hyperparams'] = str(hyperparams)

    filename = directory_path + diffusion_model.RECOMMENDER_NAME + ".csv"

    # Check if file exists
    if os.path.isfile(filename):
        # If it exists, append without writing the header
        pd.DataFrame(result_df.loc[cutoff]).transpose().to_csv(filename, mode='a', header=False, index=False)
    else:
        # If it doesn't exist, create it, write the header
        pd.DataFrame(result_df.loc[cutoff]).transpose().to_csv(filename, mode='w', header=True, index=False)

    return result_df.loc[cutoff][metric]



In [7]:
import optuna
import numpy as np
import pandas as pd
import os
from Diffusion.DiffusionRecommender import MultiBlockAttentionDiffusionRecommenderSimilarity

def objective(trial):

    cutoff = 10
    metric = 'NDCG'
    directory_path = '/Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Self-Attention/OptunaResults/Movielens100K'

    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256, 512]) # , 1024]) # Movielens100k has only 943 users!!
    embeddings_dim = trial.suggest_categorical('embeddings_dim', [6 ])#128, 256, 512, 1024])
    heads = trial.suggest_categorical('heads', [4])
    attention_blocks = trial.suggest_categorical('attention_blocks', [1]) #, 2, 3, 5, 8, 13])
    d_ff = trial.suggest_categorical('d_ff', [1024, 2048, 4096])
    epochs = trial.suggest_int('epochs', 50, 300)
    l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-3)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    noise_timesteps = trial.suggest_int('noise_timesteps', 3, 1000)
    inference_timesteps = trial.suggest_int('inference_timesteps', 1, noise_timesteps-1)
    start_beta = trial.suggest_float('start_beta', 0.00001, 0.001)
    end_beta = trial.suggest_float('end_beta', 0.01, 0.2)

    # Initialize and train the recommender

    diffusion_model = MultiBlockAttentionDiffusionRecommenderSimilarity(URM_train = URM_train, verbose = False, use_gpu = True)

    diffusion_model.fit(
                      epochs=epochs,
                      batch_size=batch_size,
                      embeddings_dim=embeddings_dim,
                      heads=heads,
                      attention_blocks = attention_blocks,
                      d_ff = d_ff,
                      l2_reg=l2_reg,
                      learning_rate=learning_rate,
                      noise_timesteps = noise_timesteps,
                      inference_timesteps = inference_timesteps,
                      start_beta = start_beta,
                      end_beta = end_beta
    )

    result_df, _ = evaluator_validation.evaluateRecommender(diffusion_model)
    hyperparams = {
    'batch_size': batch_size,
    'embeddings_dim': embeddings_dim,
    'heads': heads,
    'attention_blocks': attention_blocks,
    'd_ff': d_ff,
    'epochs': epochs,
    'l2_reg': l2_reg,
    'learning_rate': learning_rate,
    'noise_timesteps': noise_timesteps,
    'inference_timesteps': inference_timesteps,
    'start_beta': start_beta,
    'end_beta': end_beta}

    result_df['hyperparams'] = str(hyperparams)

    filename = directory_path + diffusion_model.RECOMMENDER_NAME + ".csv"

    # Check if file exists
    if os.path.isfile(filename):
        # If it exists, append without writing the header
        pd.DataFrame(result_df.loc[cutoff]).transpose().to_csv(filename, mode='a', header=False, index=False)
    else:
        # If it doesn't exist, create it, write the header
        pd.DataFrame(result_df.loc[cutoff]).transpose().to_csv(filename, mode='w', header=True, index=False)

    return result_df.loc[cutoff][metric]


ImportError: cannot import name 'MultiBlockAttentionDiffusionRecommenderSimilarity' from 'Diffusion.DiffusionRecommender' (/Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Diffusion/DiffusionRecommender.py)

In [3]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=1,show_progress_bar=True)

[I 2024-03-13 08:38:29,812] A new study created in memory with name: no-name-bda10161-2b01-4534-a68b-e533935fbfac


  0%|          | 0/1 [00:00<?, ?it/s]

  l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  self._set_arrayXarray_sparse(i, j, x)


MultiBlockAttentionDiffusionRecommenderSimilarity: Epoch 1 of 1. Elapsed time 0.97 sec
MultiBlockAttentionDiffusionRecommenderSimilarity: Terminating at epoch 1. Elapsed time 0.98 sec


In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200,show_progress_bar=True)

In [10]:
from Diffusion.DiffusionRecommender import MultiBlockAttentionDiffusionRecommender
diffusion_model = MultiBlockAttentionDiffusionRecommender(URM_train = URM_train, use_gpu = True)
diffusion_model.fit(epochs=5)

MultiBlockAttentionDiffusionRecommender: URM Detected 26 ( 1.5%) items with no interactions.
MultiBlockAttentionDiffusionRecommender: Epoch 1, loss 6.97E+00
MultiBlockAttentionDiffusionRecommender: Epoch 1 of 5. Elapsed time 4.19 sec
MultiBlockAttentionDiffusionRecommender: Epoch 2, loss 6.17E+00
MultiBlockAttentionDiffusionRecommender: Epoch 2 of 5. Elapsed time 4.38 sec
MultiBlockAttentionDiffusionRecommender: Epoch 3, loss 6.28E+00
MultiBlockAttentionDiffusionRecommender: Epoch 3 of 5. Elapsed time 4.57 sec
MultiBlockAttentionDiffusionRecommender: Epoch 4, loss 6.13E+00
MultiBlockAttentionDiffusionRecommender: Epoch 4 of 5. Elapsed time 4.75 sec
MultiBlockAttentionDiffusionRecommender: Epoch 5, loss 6.20E+00
MultiBlockAttentionDiffusionRecommender: Epoch 5 of 5. Elapsed time 4.93 sec
MultiBlockAttentionDiffusionRecommender: Terminating at epoch 5. Elapsed time 4.94 sec
MultiBlockAttentionDiffusionRecommender: Training complete


In [11]:
result_df, _ = evaluator_validation.evaluateRecommender(diffusion_model)
result_df

EvaluatorHoldout: Processed 916 (100.0%) in 2.60 sec. Users per second: 353


Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.095306,0.141,0.109041,0.042878,0.062498,0.258577,0.117581,0.101712,0.56441,0.330026,...,0.971368,0.54825,0.971368,0.011682,4.558766,0.952265,0.031481,0.469742,2.290069,0.095436
50,0.056659,0.276977,0.276483,0.01491,0.066893,0.271178,0.180707,0.094046,0.796943,0.411937,...,0.971368,0.774125,0.971368,0.045353,6.563823,0.989559,0.122222,0.676347,1.652778,0.507614


In [3]:

from Data_manager.DataSplitter_Holdout import DataSplitter_Holdout
from Data_manager.CiteULike.CiteULikeReader import CiteULike_aReader
from Evaluation.Evaluator import EvaluatorHoldout
import numpy as np

dataset_reader = CiteULike_aReader()

dataSplitter = DataSplitter_Holdout(dataset_reader, user_wise=False, split_interaction_quota_list=[80, 10, 10])
dataSplitter.load_data('/Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Hyperparameter_databases/hyperparameter_database_2024_02/k_5_cores/original/hyperopt_random_holdout_80_10_10/CiteULike_a/data') #"results_experiments/Movielens1M/data"
URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

cutoff_list = [10, 50]
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list)
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list)

DataSplitter_Holdout: Verifying data consistency...
DataSplitter_Holdout: Verifying data consistency... Passed!
DataSplitter_Holdout: DataReader: CiteULike_a
	Num items: 15429
	Num users: 5536
	Train 		quota 80.00 (80.00), 	interactions 160144, 	density 1.87E-03
	Validation 	quota 10.00 (10.00), 	interactions 20018, 	density 2.34E-04
	Test 		quota 10.00 (10.00), 	interactions 20018, 	density 2.34E-04



	ICM name: ICM_title_abstract, Num features: 7999, feature occurrences: 1031068, density 8.35E-03


DataSplitter_Holdout: Done.
EvaluatorHoldout: Ignoring 795 (14.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 742 (13.4%) Users that have less than 1 test interactions


In [5]:
import os
directory_path = '/Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Self-Attention/OptunaResults/Dataset/' + "full" + '/' 
if not os.path.exists(directory_path):
    os.makedirs(directory_path)
    print(f"Directory {directory_path} created.")

Directory /Users/lucaortolomo/Desktop/TESI/Thesis_DiffusionRecommender-main/Self-Attention/OptunaResults/Dataset/full/ created.


In [9]:
import torch
from Diffusion.similarity_models import TwoRandomWalksSampler

user = 200
sampler = TwoRandomWalksSampler(URM=URM_train, warm_user_ids=None)
user_batch = sampler.sample_batch(256, user)
user_batch_tensor = URM_train[user_batch]
user_profile_reference = URM_train[user].toarray()

# Convert CSR matrix to a dense numpy array directly
user_batch_dense_np = user_batch_tensor.toarray()

# Convert the dense numpy array to a PyTorch tensor
# and move it to the appropriate device
if str('mps') == 'mps':
    user_batch_tensor = torch.tensor(user_batch_dense_np, dtype=torch.float32, device='cpu').to('mps')
else:
# Transferring only the sparse structure to reduce the data transfer
    user_batch_tensor = torch.sparse_csr_tensor(user_batch_tensor.indptr,
                                                user_batch_tensor.indices,
                                                user_batch_tensor.data,
                                                size=user_batch_tensor.shape,
                                                dtype=torch.float32,
                                                device='cpu',
                                                requires_grad=False).to_dense()
    
print(user_batch_tensor)

  self._set_arrayXarray_sparse(i, j, x)


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [3., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='mps:0')


[[0. 0. 0. ... 0. 0. 0.]]
