In [1]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR 
from recbole.trainer import Trainer
from recbole.utils import init_logger, init_seed

In [2]:
# configuration initialization
# config module is used to set parameters and experiment setup.
config = Config(model="BPR", dataset="ml-100k", config_file_list=None, config_dict=None)

In [3]:
config


[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m /home/niranjan/miniconda3/envs/recbole/lib/python3.9/site-packages/recbole/config/../dataset_example/ml-100k[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 300[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0m
[1;36mlearning_rate[0m =[1;33m 0.001[0m
[1;36mtrain_neg_sample_args[0m =[1;33m {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False,

In [4]:
# init random seet
# initializing the random see to ensure the reproducibility of the experiment
init_seed(config["seed"], config["reproducibility"])

In [7]:
config["model"], config["MODEL_TYPE"], config["SAVE_DATASET"]

('BPR', <ModelType.GENERAL: 1>, None)

In [8]:
# dataset filtering
# filtering the data files according to the parameters indicated in the configuration
dataset = create_dataset(config)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)


In [9]:
dataset

[1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 106.04453870625663
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 59.45303210463734
[1;34mThe number of inters[0m: 100000
[1;34mThe sparsity of the dataset[0m: 93.70575143257098%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp']

In [10]:
# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

In [12]:
for i in train_data:
    print(i)
    break

The batch_size of interaction: 2048
    user_id, torch.Size([2048]), cpu, torch.int64
    item_id, torch.Size([2048]), cpu, torch.int64
    rating, torch.Size([2048]), cpu, torch.float32
    timestamp, torch.Size([2048]), cpu, torch.float32
    neg_item_id, torch.Size([2048]), cpu, torch.int64




In [13]:
config["device"]

device(type='cuda')

### Manual selection of model and trainer

In [14]:
# initializing the model according to the model names,
# and initalizing the instance of the model
model = BPR(config, train_data.dataset).to(config['device'])

In [15]:
# initializing the trainer, which is used to model training and evaluation
trainer = Trainer(config, model)

In [16]:
# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

In [18]:
# model_evaluation
test_result = trainer.evaluate(test_data)
print(test_result)

OrderedDict([('recall@10', 0.2415), ('mrr@10', 0.4763), ('ndcg@10', 0.2855), ('hit@10', 0.7815), ('precision@10', 0.193)])


## Automatic Selection of Model and Trainer

In [21]:
config["model"], config["MODEL_TYPE"]

('BPR', <ModelType.GENERAL: 1>)

In [24]:
from recbole.utils import get_model, get_trainer

# model loading and intialization
bpr = get_model(config["model"])
model = bpr(config, train_data.dataset).to(config["device"])
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)

### Resume model from break point
- Train the model from the formal parameters

In [28]:
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)
# checkpoint_file is the file used to store the model
checkpoint_file = "./saved/BPR-May-30-2024_11-13-14.pth"
trainer.resume_checkpoint(checkpoint_file)

best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)


In [31]:
print(best_valid_score)
print(best_valid_result)

0.3763
OrderedDict([('recall@10', 0.213), ('mrr@10', 0.3763), ('ndcg@10', 0.2279), ('hit@10', 0.7476), ('precision@10', 0.1619)])


### How to test the model based on previous saved parameters

In [32]:
# trainer loading and initialization
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)

# when calculate ItemCoverage metrics, we need to run this code from set item_nums in eval_collector.
trainer.eval_collector.data_collect(train_data)

# model evaluation
checkpoint_file = "./saved/BPR-May-30-2024_11-13-14.pth"

test_result = trainer.evaluate(test_data, model_file=checkpoint_file)
print(test_result)

OrderedDict([('recall@10', 0.2453), ('mrr@10', 0.4776), ('ndcg@10', 0.2873), ('hit@10', 0.79), ('precision@10', 0.1947)])
