# try recbole

<https://recbole.io/docs/user_guide/usage/use_modules.html>

Notes:

* recbole fails to train a model with numpy 1.24 or greater ("AttributeError: module 'numpy' has no attribute 'float'."). Downgrade to 1.23
* mps doesn't seem to be supported, so fall back to cpu

In [1]:
# test that python was installed with tcl-tk
import tkinter

In [2]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

In [29]:
import torch
from recbole.model.loss import BPRLoss

In [30]:
loss = BPRLoss()

In [31]:
pos_score = torch.randn(3, requires_grad=True)
neg_score = torch.randn(3, requires_grad=True)

pos_score, neg_score
# output = loss(pos_score, neg_score)

(tensor([ 1.2372, -0.9604,  1.5415], requires_grad=True),
 tensor([-0.4079,  0.8806,  0.0529], requires_grad=True))

In [36]:
torch.sigmoid(pos_score - neg_score)

tensor([0.8382, 0.1369, 0.8159], grad_fn=<SigmoidBackward0>)

In [39]:
-torch.log(torch.sigmoid(pos_score - neg_score))

tensor([0.1765, 1.9882, 0.2035], grad_fn=<NegBackward0>)

In [33]:
gamma = 1e-10

-torch.log(gamma + torch.sigmoid(pos_score - neg_score)).mean()

tensor(0.7894, grad_fn=<NegBackward0>)

In [32]:
loss(pos_score, neg_score)

tensor(0.7894, grad_fn=<NegBackward0>)

In [27]:
# configurations initialization
config = Config(model="BPR", dataset="ml-100k")

# init random seed
init_seed(config["seed"], config["reproducibility"])

# logger initialization
init_logger(config)
logger = getLogger()

# write config info into log
logger.info(config)

18 Sep 14:44    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /Users/maxharp3r/Library/Caches/pypoetry/virtualenvs/fmh-notes-FjPe97EK-py3.11/lib/python3.11/site-packages/recbole/config/../dataset_example/ml-100k
checkpoint_dir = saved
show_progress

 = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIELD = user_id
ITEM_ID_FIELD = item_id
RATING_FIELD = rating
TIME_FIELD = timestamp
seq_len = None
LABEL_FIELD = label
threshold = None
NEG_PREFIX = neg_
load_col = {'i

In [24]:
# dataset creating and filtering
dataset = create_dataset(config)
logger.info(dataset)

18 Sep 14:25    INFO  ml-100k
The number of users: 944
Average actions of users: 106.04453870625663
The number of items: 1683
Average actions of items: 59.45303210463734
The number of inters: 100000
The sparsity of the dataset: 93.70575143257098%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']


In [25]:
# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

# model loading and initialization
model = BPR(config, train_data.dataset).to(config["device"])
logger.info(model)

18 Sep 14:25    INFO  [Training]: train_batch_size = [2048] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
18 Sep 14:25    INFO  [Evaluation]: eval_batch_size = [4096] eval_args: [{'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user', 'order': 'RO', 'mode': 'full'}]
18 Sep 14:25    INFO  BPR(
  (user_embedding): Embedding(944, 64)
  (item_embedding): Embedding(1683, 64)
  (loss): BPRLoss()
)
Trainable parameters: 168128


In [42]:
next(enumerate(train_data))

(0,
 The batch_size of interaction: 2048
     user_id, torch.Size([2048]), cpu, torch.int64
     item_id, torch.Size([2048]), cpu, torch.int64
     rating, torch.Size([2048]), cpu, torch.float32
     timestamp, torch.Size([2048]), cpu, torch.float32
     neg_item_id, torch.Size([2048]), cpu, torch.int64
 )

In [40]:
for batch_idx, batched_data in enumerate(train_data):

.dataset.

[1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 85.69247083775186
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 48.974545454545456
[1;34mThe number of inters[0m: 80808
[1;34mThe sparsity of the dataset[0m: 94.91374361763195%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'neg_item_id']

In [26]:
# trainer loading and initialization
trainer = Trainer(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

RuntimeError: User specified autocast device_type must be 'cuda' or 'cpu'

In [7]:
# model evaluation
test_result = trainer.evaluate(test_data)
print(test_result)

18 Sep 14:12    INFO  Loading model structure and parameters from saved/BPR-Sep-18-2023_14-12-40.pth


OrderedDict([('recall@10', 0.2388), ('mrr@10', 0.482), ('ndcg@10', 0.2862), ('hit@10', 0.772), ('precision@10', 0.1914)])
