In [None]:
!pip install recbole

# train model with recbole
https://recbole.io/docs/user_guide/usage/use_modules.html

In [None]:
import logging
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.model.sequential_recommender import LightSANs
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

In [None]:
parameter_dict = {
    'data_path': '../input/',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'TIME_FIELD': 'timestamp',
    'user_inter_num_interval': "[40,inf)",
    'item_inter_num_interval': "[30,inf)",
    'load_col': {'inter': ['user_id', 'item_id', 'timestamp'],
                 'item': ['item_id', 'product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                      'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                      'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no',#],
                        *[f'{i}' for i in range(30)]]
             },
    'selected_features': ['product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                          'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                          'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no',#],
                        *[f'{i}' for i in range(30)]],
    'neg_sampling': None,
    'eval_args': {
        'split': {'RS': [9, 0, 1]},
        'group_by': 'user',
        'order': 'TO',
        'mode': 'full'},
    'learning_rate': 3e-3,
    'metrics': 'MAP',
    'topk': 12,
    'valid_metric': 'MAP@12',
    'reproducibility': False,
    'epochs': 50,
    'train_batch_size': 3000,
    'embedding_size': 64,
    'hidden_size': 64,#128,
    #'num_layers': 1,
    'dropout_prob': 0.5 #0.3#,
    #'k_interests': 100
}

config = Config(model='LightSANs', dataset='hm-data-recbole-small', config_dict=parameter_dict)

In [None]:
# init random seed
init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)

In [None]:
dataset = create_dataset(config)
logger.info(dataset)

In [None]:
# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

In [None]:
print(config['device'])
# model loading and initialization
model = LightSANs(config, train_data.dataset).to(config['device'])
logger.info(model)

# trainer loading and initialization
trainer = Trainer(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data=test_data)

# get preds
https://recbole.io/docs/user_guide/usage/case_study.html

In [None]:
from recbole.utils.case_study import full_sort_topk
external_user_ids = dataset.id2token(
    dataset.uid_field, list(range(dataset.user_num)))[1:]#fist element in array is 'PAD'(default of Recbole) ->remove it 

In [None]:
topk_items = []
for internal_user_id in list(range(dataset.user_num))[1:]:
    _, topk_iid_list = full_sort_topk([internal_user_id], model, test_data, k=24, device=config['device'])
    last_topk_iid_list = topk_iid_list[-1]
    external_item_list = dataset.id2token(dataset.iid_field, last_topk_iid_list.cpu()).tolist()
    topk_items.append(external_item_list)
print(len(topk_items))

In [None]:
import pandas as pd
external_item_str = [' '.join(x) for x in topk_items]
result = pd.DataFrame(external_user_ids, columns=['customer_id'])
result['prediction'] = external_item_str
result.head()

In [None]:
result.shape
result.to_parquet('lightsan_sub_t24.parquet.gzip')

In [None]:
test = pd.read_parquet('lightsan_sub_t24.parquet.gzip')
test.shape

# combine preds with default preds

In [None]:
submit_df = pd.read_csv('../input/0237-ensemble-submission-handm/0238_ensemble.csv')
submit_df.shape

In [None]:
submit_df.head()

In [None]:
submit_df = pd.merge(submit_df, result, on='customer_id', how='outer')
submit_df.head()

In [None]:
submit_df = submit_df.fillna(-1)
submit_df['prediction'] = submit_df.apply(
    lambda x: x['prediction_y'] if x['prediction_y'] != -1 else x['prediction_x'], axis=1)
submit_df.head()

In [None]:
submit_df = submit_df.drop(columns=['prediction_y', 'prediction_x'])
submit_df.head()

In [None]:
submit_df.to_csv('submission.csv.gzip', index=False)

In [None]:
print('test')