In [1]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger




In [2]:
def train_test(model_name,
               dataset_name,
               parameter_dict):

    # configurations initialization
    config = Config(model=model_name, dataset=dataset_name, config_dict=parameter_dict)

    # init random seed
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    init_logger(config)
    logger = getLogger()

    # write config info into log
    logger.info(config)

    # dataset creating and filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # model loading and initialization
    model = BPR(config, train_data.dataset).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = Trainer(config, model)

    # model training
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)
    print('\n\nTraining best results')
    print('best_valid_score: ', best_valid_score)
    print('best_valid_result: ', best_valid_result)

    # model evaluation
    test_result = trainer.evaluate(test_data)
    
    print('\n\nTest results')
    print(test_result)

In [3]:
K = 3
VALID_METRIC = 'Recall@'+str(K)
MODEL = 'BPR'
SEED = 2020
USE_GPU = False
SHUFFLE = False 

# sudden_drift_dataset 4000x7 part 1

In [4]:
dataset_name='sudden_drift_dataset_4000x7_0.71_pt1'
data_path = 'processed_datasets/artificial_data/'
parameter_dict = {
    'dataset': dataset_name+'.inter',
    'data_path': data_path,
    'load_col': {'inter': ['user_id', 'item_id']},
    'use_gpu':USE_GPU,
    'topk':K,
    'valid_metric':VALID_METRIC,
    'checkpoint_dir':data_path+dataset_name,
    'seed':SEED,
    'shuffle': SHUFFLE
}


train_test(MODEL, dataset_name, parameter_dict)

01 Jan 16:20    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = False
seed = 2020
state = INFO
reproducibility = True
data_path = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt1
checkpoint_dir = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt1
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [3]
val



Training best results
best_valid_score:  0.6063
best_valid_result:  OrderedDict([('recall@3', 0.6063), ('mrr@3', 0.3517), ('ndcg@3', 0.4168), ('hit@3', 0.6063), ('precision@3', 0.2021)])


  checkpoint = torch.load(checkpoint_file, map_location=self.device)
01 Jan 16:20    INFO  Loading model structure and parameters from processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt1\BPR-Jan-01-2025_16-20-32.pth




Test results
OrderedDict([('recall@3', 0.5432), ('mrr@3', 0.3278), ('ndcg@3', 0.3825), ('hit@3', 0.5432), ('precision@3', 0.1811)])


# sudden drift dataset 4000x7 0.71 pt2

In [5]:
dataset_name='sudden_drift_dataset_4000x7_0.71_pt2'
data_path = 'processed_datasets/artificial_data/'
parameter_dict = {
    'dataset': dataset_name+'.inter',
    'data_path': data_path,
    'load_col': {'inter': ['user_id', 'item_id']},
    'use_gpu':USE_GPU,
    'topk':K,
    'valid_metric':VALID_METRIC,
    'checkpoint_dir':data_path+dataset_name,
    'seed':SEED,
    'shuffle': SHUFFLE
}

train_test(MODEL, dataset_name, parameter_dict)

01 Jan 16:20    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = False
seed = 2020
state = INFO
reproducibility = True
data_path = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt2
checkpoint_dir = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt2
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [3]
val



Training best results
best_valid_score:  0.5644
best_valid_result:  OrderedDict([('recall@3', 0.5644), ('mrr@3', 0.3365), ('ndcg@3', 0.3947), ('hit@3', 0.5644), ('precision@3', 0.1881)])


  checkpoint = torch.load(checkpoint_file, map_location=self.device)
01 Jan 16:20    INFO  Loading model structure and parameters from processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt2\BPR-Jan-01-2025_16-20-38.pth




Test results
OrderedDict([('recall@3', 0.5402), ('mrr@3', 0.3467), ('ndcg@3', 0.3962), ('hit@3', 0.5402), ('precision@3', 0.1801)])


# sudden drift dataset 4000x7 0.71 pt3

In [6]:
dataset_name='sudden_drift_dataset_4000x7_0.71_pt3'
data_path = 'processed_datasets/artificial_data/'
parameter_dict = {
    'dataset': dataset_name+'.inter',
    'data_path': data_path,
    'load_col': {'inter': ['user_id', 'item_id']},
    'use_gpu':USE_GPU,
    'topk':K,
    'valid_metric':VALID_METRIC,
    'checkpoint_dir':data_path+dataset_name,
    'seed':SEED,
    'shuffle': SHUFFLE
}

train_test(MODEL, dataset_name, parameter_dict)

01 Jan 16:20    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = False
seed = 2020
state = INFO
reproducibility = True
data_path = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt3
checkpoint_dir = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt3
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [3]
val



Training best results
best_valid_score:  0.5887
best_valid_result:  OrderedDict([('recall@3', 0.5887), ('mrr@3', 0.3663), ('ndcg@3', 0.4231), ('hit@3', 0.5887), ('precision@3', 0.1962)])


  checkpoint = torch.load(checkpoint_file, map_location=self.device)
01 Jan 16:20    INFO  Loading model structure and parameters from processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71_pt3\BPR-Jan-01-2025_16-20-40.pth




Test results
OrderedDict([('recall@3', 0.5163), ('mrr@3', 0.3236), ('ndcg@3', 0.3727), ('hit@3', 0.5163), ('precision@3', 0.1721)])


# sudden drift dataset 4000x7 0.71 full

In [7]:
dataset_name='sudden_drift_dataset_4000x7_0.71'
data_path = 'processed_datasets/artificial_data/'
parameter_dict = {
    'dataset': dataset_name+'.inter',
    'data_path': data_path,
    'load_col': {'inter': ['user_id', 'item_id']},
    'use_gpu':USE_GPU,
    'topk':K,
    'valid_metric':VALID_METRIC,
    'checkpoint_dir':data_path+dataset_name,
    'seed':SEED,
    'shuffle': SHUFFLE
}

train_test(MODEL, dataset_name, parameter_dict)

01 Jan 16:20    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = False
seed = 2020
state = INFO
reproducibility = True
data_path = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71
checkpoint_dir = processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [3]
valid_metri



Training best results
best_valid_score:  0.4956
best_valid_result:  OrderedDict([('recall@3', 0.4956), ('mrr@3', 0.3102), ('ndcg@3', 0.3575), ('hit@3', 0.4956), ('precision@3', 0.1652)])


  checkpoint = torch.load(checkpoint_file, map_location=self.device)
01 Jan 16:20    INFO  Loading model structure and parameters from processed_datasets/artificial_data/sudden_drift_dataset_4000x7_0.71\BPR-Jan-01-2025_16-20-44.pth




Test results
OrderedDict([('recall@3', 0.5655), ('mrr@3', 0.3436), ('ndcg@3', 0.4002), ('hit@3', 0.5655), ('precision@3', 0.1885)])
