In [12]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.trainer import Trainer 
from recbole.utils import init_logger, init_seed, get_model, get_trainer

In [13]:
config = Config(model = "GRU4Rec", dataset="Food", config_file_list=["config.yaml"])

In [14]:
init_seed(config["seed"], config["reproducibility"])
init_logger(config)
logger = getLogger()
logger.info(config)

30 May 14:03    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = Food_Dataset/Food
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 10
train_batch_size = 512
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = True
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 512
metric_decimal_place = 4

Dataset Hyper Parameter

In [15]:
dataset = create_dataset(config)
logger.info(dataset)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
30 May 14:03    INFO  Food
The number of users: 226571
Average actions of users: 4.997868208500684
The number of items: 231638
Aver

In [16]:
train_data, valid_data, test_data = data_preparation(config, dataset)

30 May 14:04    INFO  [Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
30 May 14:04    INFO  [Evaluation]: eval_batch_size = [512] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}]


In [17]:
train_data

<recbole.data.dataloader.general_dataloader.TrainDataLoader at 0x745ea563d400>

In [18]:
for i in train_data:
    print(i)
    break

The batch_size of interaction: 512
    user_id, torch.Size([512]), cpu, torch.int64
    item_id, torch.Size([512]), cpu, torch.int64
    timestamp, torch.Size([512]), cpu, torch.float32
    item_length, torch.Size([512]), cpu, torch.int64
    item_id_list, torch.Size([512, 50]), cpu, torch.int64
    timestamp_list, torch.Size([512, 50]), cpu, torch.float32




In [19]:
config["model"], config["MODEL_TYPE"], config["device"]

('GRU4Rec', <ModelType.SEQUENTIAL: 2>, device(type='cuda'))

In [20]:
gru4rec_model = get_model(config["model"])
model = gru4rec_model(config, train_data.dataset).to(config["device"])

In [21]:
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)

In [22]:
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

30 May 14:05    INFO  epoch 0 training [time: 57.88s, train loss: 18955.6827]
30 May 14:05    INFO  epoch 0 evaluating [time: 1.32s, valid_score: 0.005900]
30 May 14:05    INFO  valid result: 
recall@10 : 0.0177    mrr@10 : 0.0059    ndcg@10 : 0.0086    hit@10 : 0.0177    precision@10 : 0.0018
30 May 14:05    INFO  Saving current: saved/GRU4Rec-May-30-2024_14-04-06.pth
30 May 14:06    INFO  epoch 1 training [time: 58.00s, train loss: 18277.6918]
30 May 14:06    INFO  epoch 1 evaluating [time: 1.30s, valid_score: 0.005800]
30 May 14:06    INFO  valid result: 
recall@10 : 0.0173    mrr@10 : 0.0058    ndcg@10 : 0.0084    hit@10 : 0.0173    precision@10 : 0.0017
30 May 14:07    INFO  epoch 2 training [time: 58.05s, train loss: 17832.3166]
30 May 14:07    INFO  epoch 2 evaluating [time: 1.31s, valid_score: 0.006300]
30 May 14:07    INFO  valid result: 
recall@10 : 0.0192    mrr@10 : 0.0063    ndcg@10 : 0.0093    hit@10 : 0.0192    precision@10 : 0.0019
30 May 14:07    INFO  Saving current: 

0.0066 OrderedDict([('recall@10', 0.018), ('mrr@10', 0.0066), ('ndcg@10', 0.0092), ('hit@10', 0.018), ('precision@10', 0.0018)])


In [23]:
print(best_valid_score, best_valid_result)

0.0066 OrderedDict([('recall@10', 0.018), ('mrr@10', 0.0066), ('ndcg@10', 0.0092), ('hit@10', 0.018), ('precision@10', 0.0018)])


### Model Inference

In [24]:
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)

# When calculate ItemCoverage metrics, we need to run this code for set item_nums in eval_collector.
trainer.eval_collector.data_collect(train_data)

checkpoint_file = "saved/GRU4Rec-May-30-2024_14-04-06.pth"
test_result = trainer.evaluate(test_data, model_file=checkpoint_file)
print(test_result)

30 May 15:05    INFO  Loading model structure and parameters from saved/GRU4Rec-May-30-2024_14-04-06.pth


OrderedDict([('recall@10', 0.0191), ('mrr@10', 0.0074), ('ndcg@10', 0.0101), ('hit@10', 0.0191), ('precision@10', 0.0019)])
