### Install Beta-recsys

## Loading dataset

In [1]:
import sys

sys.path.append("../")

import random

import numpy as np

from beta_rec.data.grocery_data import GroceryData
from beta_rec.datasets.instacart import Instacart_25

seed = 2021
random.seed(seed)  # Fix random seeds for reproducibility
np.random.seed(seed)

# make sure that you have already download the Instacart data from this link: https://www.kaggle.com/c/instacart-market-basket-analysis#
# uncompressed them and put them in this folder: ../datasets/instacart_25/raw/*.csv


dataset = Instacart_25(
    min_u_c=20, min_i_c=30, min_o_c=10
)  # Specifying the filtering conditions.

# Split the data
split_dataset = dataset.load_temporal_basket_split(test_rate=0.2, n_test=10)
data = GroceryData(split_dataset)

--------------------------------------------------------------------------------
Loaded training set statistics
+---------+------------+------------+--------------+-----------------+-------------+
|         | col_user   | col_item   | col_rating   | col_timestamp   | col_order   |
|---------+------------+------------+--------------+-----------------+-------------|
| count   | 3857794    | 3857794    | 3857794      | 3857794         | 3857794     |
| nunique | 23093      | 14565      | 1            | 3857794         | 373719      |
+---------+------------+------------+--------------+-----------------+-------------+
valid_data_0 statistics
+---------+------------+------------+--------------+-----------------+
|         | col_user   | col_item   | col_rating   | col_timestamp   |
|---------+------------+------------+--------------+-----------------|
| count   | 3076168    | 3076168    | 3076168      | 3076168         |
| nunique | 22475      | 14565      | 2            | 1               |

### Model config

In [2]:
config = {"config_file": "../configs/vbcar_default.json"}
config["n_sample"] = 5000000  # To reduce the test running time
config["max_epoch"] = 80
config["emb_dim"] = 64
config["root_dir"] = "/home/zm324/workspace/beta-recsys/"
config["dataset"] = "instacart_25"
config["batch_size"] = 10000
config["lr"] = 0.001

# config["item_fea_type"] = "random_word2vec"
# config["tunable"] = [
#     {"name": "lr", "type": "choice", "values": [0.5, 0.05, 0.025, 0.001, 0.005]},
# ]
# config["tune"] = True
# the 'config_file' key is required, that is used load a default config.
# Other keys can be specified to replace the default settings.

### Model intialization and training

In [None]:
from beta_rec.recommenders import VBCAR

for item_fea_type in [
#     "random",
#     "cate",
#     "cate_word2vec",
#     "cate_bert",
#     "cate_one_hot",
#     "random_word2vec",
#     "random_bert",
    "random_one_hot",
    "random_bert_word2vec_one_hot",
    "random_cate_word2vec",
    "random_cate_bert",
    "random_cate_one_hot",
    "random_cate_bert_word2vec_one_hot",
]:
    config["item_fea_type"] = item_fea_type
    model = VBCAR(config)
    model.train(data)
    model.test(data.test)
# @To be discussed
# model.train(train_df)
# Case 1, without validation, stop training by loss or max_epoch

# model.train(train_df,valid_df[0])
# Case 2, with validation, stop training by performance on validation set

# model.train(train_df,valid_df[0],test_df[0])
# Case 3, same as Case 2, but also evaluate performance for each epoch on test set.

# Note that the best model will be save automatically, and record the model-save-dir.

Search default config file in /home/zm324/anaconda3/envs/beta_rec/configs/vbcar_default.json
Found default config file in /home/zm324/anaconda3/envs/beta_rec/configs/vbcar_default.json
loading config file /home/zm324/anaconda3/envs/beta_rec/configs/vbcar_default.json
--------------------------------------------------------------------------------
Received parameters from command line (or default):
+----+-----------------------+------------------------------------+
|    | keys                  | values                             |
|----+-----------------------+------------------------------------|
|  0 | system:root_dir       | /home/zm324/workspace/beta-recsys/ |
|  1 | model:n_sample        | 5000000                            |
|  2 | model:max_epoch       | 80                                 |
|  3 | model:emb_dim         | 64                                 |
|  4 | model:batch_size      | 10000                              |
|  5 | model:lr              | 0.001                   

2021-12-10 20:25:09 [ERROR]-Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2021-12-10 20:25:10 [ERROR]-wandb: Currently logged in as: mengzaiqiao (use `wandb login --relogin` to force relogin)


2021-12-10 20:25:11 [INFO]-Get a gpu with the most available memory : 1
2021-12-10 20:25:11 [INFO]-Initializing test engine ...
2021-12-10 20:25:11 [INFO]-Get a gpu with the most available memory : 1
2021-12-10 20:25:11 [INFO]-load basic item featrue for dataset: instacart_25  type: one_hot
2021-12-10 20:25:18 [INFO]-Setting device for torch_engine cuda:1
2021-12-10 20:25:18 [INFO]-
VBCAR(
  (user_emb): Embedding(23093, 64)
  (item_emb): Embedding(14565, 64)
  (fc_u_1_mu): Linear(in_features=512, out_features=256, bias=True)
  (fc_u_2_mu): Linear(in_features=256, out_features=128, bias=True)
  (fc_i_1_mu): Linear(in_features=676, out_features=256, bias=True)
  (fc_i_2_mu): Linear(in_features=256, out_features=128, bias=True)

2021-12-10 20:25:18 [INFO]-
2021-12-10 20:25:19 [INFO]-Initialize Sampler!
2021-12-10 20:25:19 [INFO]-Preparing training triples ... 




100%|##########| 5000000/5000000 [10:36<00:00, 7861.37it/s]
2021-12-10 20:36:15 [INFO]-done!
  0%|          | 0/80 [00:00<?, ?it/s]Epoch 0 starts !
2021-12-10 20:36:28 [INFO]---------------------------------------------------------------------------------
2021-12-10 20:46:53 [INFO]-[Training Epoch 0], log_like_loss 0.410731360244751 kl_loss: 0.004772369949799031 alpha: 0.001 lr: 0.001




2021-12-10 20:46:53 [INFO]-Execute [train_an_epoch] method costing 624615.68 ms
  1%|1         | 1/80 [10:24<13:42:24, 624.62s/it]Epoch 1 starts !
2021-12-10 20:46:53 [INFO]---------------------------------------------------------------------------------
2021-12-10 20:57:17 [INFO]-[Training Epoch 1], log_like_loss 0.3479377017021179 kl_loss: 0.009294341337680817 alpha: 0.001 lr: 0.001
2021-12-10 20:57:17 [INFO]-Execute [train_an_epoch] method costing 624607.43 ms
  2%|2         | 2/80 [20:49<13:32:00, 624.62s/it]Epoch 2 starts !
2021-12-10 20:57:17 [INFO]---------------------------------------------------------------------------------
2021-12-10 21:07:44 [INFO]-[Training Epoch 2], log_like_loss 0.31180526585578916 kl_loss: 0.012355786944925786 alpha: 0.001 lr: 0.001
2021-12-10 21:07:44 [INFO]-Execute [train_an_epoch] method costing 626342.65 ms
  4%|3         | 3/80 [31:15<13:22:16, 625.15s/it]Epoch 3 starts !
2021-12-10 21:07:44 [INFO]--------------------------------------------------



2021-12-11 03:26:16 [INFO]-Current testEngine.best_valid_performance 0
2021-12-11 03:26:16 [INFO]---------------------------------------------------------------------------------
2021-12-11 03:26:16 [INFO]-performance on validation at epoch 40
2021-12-11 03:26:16 [INFO]-
+----+--------------+----------+
|    | metrics      |   values |
|----+--------------+----------|
|  0 | ndcg@10      | 0.81568  |
|  1 | precision@10 | 0.767444 |
|  2 | recall@10    | 0.295832 |
|  3 | map@10       | 0.262096 |
+----+--------------+----------
2021-12-11 03:26:16 [INFO]-
2021-12-11 03:26:16 [INFO]---------------------------------------------------------------------------------
2021-12-11 03:26:16 [INFO]-Execute [train_eval_worker] method costing 98604.68 ms
2021-12-11 03:35:23 [INFO]-[Training Epoch 41], log_like_loss 0.189126189994812 kl_loss: 0.0318423326253891 alpha: 0.001 lr: 0.001
2021-12-11 03:35:23 [INFO]-Execute [train_an_epoch] method costing 645508.98 ms
 52%|#####2    | 42/80 [6:58:56<6:13

VBox(children=(Label(value=' 2.36MB of 2.36MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_load (%),6.0
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_load (%)/global_step,26534.0
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_memory_free (%),42199.69141
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_memory_free (%)/global_step,26534.0
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_memory_used (%),60200.30859
VBCAR_default_20211210_202509_murihu/device/GPU/GPU_memory_used (%)/global_step,26534.0
VBCAR_default_20211210_202509_murihu/device/cpu/CPU_load (%),97.4
VBCAR_default_20211210_202509_murihu/device/cpu/CPU_load (%)/global_step,26534.0
VBCAR_default_20211210_202509_murihu/device/mem/memory_used (GB),3.39391
VBCAR_default_20211210_202509_murihu/device/mem/memory_used (GB)/global_step,26534.0


2021-12-11 11:53:34 [INFO]-Get a gpu with the most available memory : 3




2021-12-11 11:53:34 [INFO]-Initializing test engine ...
2021-12-11 11:53:35 [INFO]-Get a gpu with the most available memory : 3
2021-12-11 11:53:35 [INFO]-[ERROR]: CANNOT support feature type random_bert_word2vec_one_hot! intialize with random feature
2021-12-11 11:53:45 [INFO]-Setting device for torch_engine cuda:3
2021-12-11 11:53:45 [INFO]-
VBCAR(
  (user_emb): Embedding(23093, 64)
  (item_emb): Embedding(14565, 64)
  (fc_u_1_mu): Linear(in_features=512, out_features=256, bias=True)
  (fc_u_2_mu): Linear(in_features=256, out_features=128, bias=True)
  (fc_i_1_mu): Linear(in_features=512, out_features=256, bias=True)
  (fc_i_2_mu): Linear(in_features=256, out_features=128, bias=True)

2021-12-11 11:53:45 [INFO]-




2021-12-11 11:53:48 [INFO]-Initialize Sampler!
2021-12-11 11:53:48 [INFO]-Preparing training triples ... 




100%|##########| 5000000/5000000 [11:12<00:00, 7432.61it/s]
2021-12-11 12:05:24 [INFO]-done!
  0%|          | 0/80 [00:00<?, ?it/s]Epoch 0 starts !
2021-12-11 12:05:35 [INFO]---------------------------------------------------------------------------------
2021-12-11 12:15:07 [INFO]-[Training Epoch 0], log_like_loss 0.4131819338798523 kl_loss: 0.003681460132729262 alpha: 0.001 lr: 0.001




2021-12-11 12:15:07 [INFO]-Execute [train_an_epoch] method costing 571800.54 ms
  1%|1         | 1/80 [09:31<12:32:52, 571.80s/it]Epoch 1 starts !
2021-12-11 12:15:07 [INFO]---------------------------------------------------------------------------------
2021-12-11 12:24:44 [INFO]-[Training Epoch 1], log_like_loss 0.3477949652671814 kl_loss: 0.008061615947633981 alpha: 0.001 lr: 0.001
2021-12-11 12:24:44 [INFO]-Execute [train_an_epoch] method costing 577684.08 ms
  2%|2         | 2/80 [19:09<12:25:38, 573.57s/it]Epoch 2 starts !
2021-12-11 12:24:44 [INFO]---------------------------------------------------------------------------------
2021-12-11 12:34:21 [INFO]-[Training Epoch 2], log_like_loss 0.3120503520488739 kl_loss: 0.011665179282426835 alpha: 0.001 lr: 0.001
2021-12-11 12:34:21 [INFO]-Execute [train_an_epoch] method costing 576553.22 ms
  4%|3         | 3/80 [28:46<12:17:14, 574.48s/it]Epoch 3 starts !
2021-12-11 12:34:21 [INFO]---------------------------------------------------