### Install Beta-recsys

## Loading dataset

In [1]:
import sys

sys.path.append("../")

import random

import numpy as np

from beta_rec.data.grocery_data import GroceryData
from beta_rec.datasets.instacart import Instacart_25

seed = 2021
random.seed(seed)  # Fix random seeds for reproducibility
np.random.seed(seed)

# make sure that you have already download the Instacart data from this link: https://www.kaggle.com/c/instacart-market-basket-analysis#
# uncompressed them and put them in this folder: ../datasets/instacart_25/raw/*.csv


dataset = Instacart_25(
    min_u_c=20, min_i_c=30, min_o_c=10
)  # Specifying the filtering conditions.

# Split the data
split_dataset = dataset.load_temporal_basket_split(test_rate=0.2, n_test=10)
data = GroceryData(split_dataset)

--------------------------------------------------------------------------------
Loaded training set statistics
+---------+------------+------------+--------------+-----------------+-------------+
|         | col_user   | col_item   | col_rating   | col_timestamp   | col_order   |
|---------+------------+------------+--------------+-----------------+-------------|
| count   | 3857794    | 3857794    | 3857794      | 3857794         | 3857794     |
| nunique | 23093      | 14565      | 1            | 3857794         | 373719      |
+---------+------------+------------+--------------+-----------------+-------------+
valid_data_0 statistics
+---------+------------+------------+--------------+-----------------+
|         | col_user   | col_item   | col_rating   | col_timestamp   |
|---------+------------+------------+--------------+-----------------|
| count   | 3076168    | 3076168    | 3076168      | 3076168         |
| nunique | 22475      | 14565      | 2            | 1               |

KeyboardInterrupt: 

### Model config

In [None]:
config = {"config_file": "../configs/vbcar_default.json"}
config["n_sample"] = 1000000  # To reduce the test running time
config["max_epoch"] = 80
config["emb_dim"] = 64
config["late_dim"] = 512
config["root_dir"] = "/home/zm324/workspace/beta-recsys/"
config["dataset"] = "instacart_25"
config["batch_size"] = 512
config["lr"] = 0.001

# config["item_fea_type"] = "random_word2vec"
# config["tunable"] = [
#     {"name": "lr", "type": "choice", "values": [0.5, 0.05, 0.025, 0.001, 0.005]},
# ]
# config["tune"] = True
# the 'config_file' key is required, that is used load a default config.
# Other keys can be specified to replace the default settings.

### Model intialization and training

In [None]:
from beta_rec.recommenders import VBCAR

for item_fea_type in [
    "random",
    "cate",
    "cate_word2vec",
    "cate_bert",
    "cate_one_hot",
    "random_word2vec",
    "random_bert",
    #     "random_one_hot",
    #     "random_bert_word2vec_one_hot",
    #     "random_cate_word2vec",
    #     "random_cate_bert",
    #     "random_cate_one_hot",
    #     "random_cate_bert_word2vec_one_hot",
]:
    config["item_fea_type"] = item_fea_type
    model = VBCAR(config)
    model.train(data)
    model.test(data.test[0])
# @To be discussed
# model.train(train_df)
# Case 1, without validation, stop training by loss or max_epoch

# model.train(train_df,valid_df[0])
# Case 2, with validation, stop training by performance on validation set

# model.train(train_df,valid_df[0],test_df[0])
# Case 3, same as Case 2, but also evaluate performance for each epoch on test set.

# Note that the best model will be save automatically, and record the model-save-dir.