# Transformerによる時系列予測

# upload modules

In [None]:
from distutils.dir_util import copy_tree

copy_tree(src = "/kaggle/input/m5-forecasting/src", 
          dst = "/kaggle/working/src")


# import modules

In [None]:
import os
import sys 
import gc 
import warnings 
import random
from pathlib import Path

import numpy as np
import torch 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.getcwd())

seed = 0
random.seed(seed)  
np.random.seed(seed)  
torch.manual_seed(seed) 

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 5000)

%matplotlib inline
plt.tick_params(colors='white')
sns.set_style("darkgrid")

warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', pd.core.common.SettingWithCopyWarning)
warnings.simplefilter('ignore', RuntimeWarning)


In [None]:
from src.data.make_dataset import (
    read_dataset, 
    get_date_cols, 
    merge_dataset
)
from src.features.build_dataset import (
    TSdatasets, 
    setting_dataloader
)

from src.features.build_features import (
    select_activate_items, 
    complement_missing,
    add_base_features,
    feature_enginearing
)
from src.models.eval_model import (
    WRMSSEEvaluator, 
    root_mean_squared_error, 
    eval_quantity
)
from src.models.setting_model import (
    AdaBound, 
    GradualWarmupScheduler
)
from src.models.architecture import TransformerModel
from src.models.loss import RMSELoss
from src.models.setting_model import setting_model
from src.models.train_model import RecursiveModel
from src.visualization.visualize import (
    plot_sales,
    plot_lr_and_sr,
    plot_losses,
    plot_eval, 
    plot_eval_per_group,
    plot_prediction
)
from src.models.predict_model import (
    output_inverse, 
    to_submission
)


# Config

In [None]:
private_sub = False

In [None]:
model_path = 'models/transformer.model'
vis_dir = 'reports/figures'
reports_dir = 'reports'

In [None]:
# dataset
N_IDS = 5000
# N_IDS = 30490
max_train_size = 364 * 5

test_size = 28
d_col = 'd'

# features
diff_trans = False

pow_trans = False
season_diff_interval = 0
# season_diff_interval = 28
std_trans = True
minmax_trans = False

base_cols = [
    'date', 
    'weekday', 
    'month', 
    'year', 
    'event_name_1', 
    'event_type_1', 
    'event_name_2', 
    'event_type_2', 
    'snap_CA', 
    'snap_TX', 
    'snap_WI'
]

num_cols = [
    'sell_price', 
    'is_snap'
]
sales_cat_cols = [
    'id', 
    'item_id', 
    'dept_id', 
    'cat_id', 
    'store_id', 
    'state_id'
]
cat_cols = [
    'quarter', 
    'is_weekend', 
    'part_of_month', 
    'event_name_1', 
    'event_type_1'
]

all_cat_cols = sales_cat_cols + cat_cols 

# if 0, onehot
cat_emb = {
    'id': 80,
    'item_id': 30, 
    'dept_id': 0,
    'cat_id': 0, 
    'store_id': 0, 
    'state_id': 0,
    'quarter': 0,
    'is_weekend': 0,
    'part_of_month': 0,
    'weekday': 3,
    'month': 4,
    'event_name_1': 10,
    'event_type_1':0,
    'event_name_2': 10,
    'event_type_2':0
}



In [None]:
# model
epochs = 30
batch_size = 1024//4
clipping_value = 0.5
log_interval = 1

bptt_x = 28 * 3
bptt_y = test_size
lags = [(1, 6), (7, 28 * 3)] # (tau, period)

all_lags = [lag for tau, period in lags for lag in range(tau, period+1, tau)]
t_emb = sum([lag[1] // lag[0] for lag in lags])
max_tau, max_lag  = lags[-1]

scheduled_sampling = True
src_mask = False
memory_mask = False
nhid = 2048 // 2 # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6 // 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 8 // 2 # the number of heads in the multiheadattention models
dropout = 0.2 # the dropout value
# fc_dims = [128, 64]
fc_dims = []
activation = 'relu'

lr = 1e-3 # learning rate



# Read

In [None]:
sell_prices, sample_submission, calendar, sales_train = read_dataset(private_sub)

In [None]:
level_cols, train_d_cols, test_d_cols, d2F_map = get_date_cols(
    sales_train, 
    sample_submission, 
    max_train_size, 
    max_lag, 
    private_sub=private_sub
)
    

# Feature engineering

In [None]:
sales_train = sales_train.iloc[:N_IDS, :]

X = merge_dataset(
    sales_train, 
    calendar, 
    sell_prices, 
    base_cols,
    level_cols,
    train_d_cols,
    test_d_cols
)



In [None]:
X = select_activate_items(X, train_d_cols, test_size, bptt_x, max_lag)
X = complement_missing(X)
X = add_base_features(X)


In [None]:
plot_sales(
    X.groupby('d')['sales'].sum(), 
    X['sales'], 
    train_d_cols
)

In [None]:
X, all_cat_cols, all_num_cols, list_dtrans, id_enc = feature_enginearing(
    X, 
    train_d_cols[max_lag+1:-test_size], 
    test_d_cols, 
    lags, 
    max_lag,
    sales_cat_cols,
    cat_cols,
    num_cols, 
    diff_trans,
    dtrans_map={'sales':[pow_trans, std_trans, minmax_trans, season_diff_interval], 
                'sell_price':[False, True, False, 0]}, 
    clipping_range={'sales': (0.0, 1.0), 
                    'sell_price': (0.0, 1.0)}
)

trainloader, validloader, validmaskloader, testloader = setting_dataloader(
    X, 
    train_d_cols,
    test_d_cols,
    all_num_cols,
    all_cat_cols,
    bptt_x,
    bptt_y,
    max_lag,
    test_size,
    batch_size,
)


In [None]:
plot_sales(
    X.groupby(['id', 'd'])['sales'].sum().reset_index().pivot(
        index='id', columns='d', values='sales'
    ).sum(0), 
    X['sales'], 
    train_d_cols
)


# Modeling

In [None]:
model_params = dict(
    src_seq_len=bptt_x,
    d_model=len(all_num_cols),
    nhead=nhead,
    nhid=nhid,
    nlayers=nlayers,
    dropout=dropout,
    fc_dims=fc_dims,
    activation=activation,
    use_src_mask=src_mask,
    use_memory_mask=memory_mask,
)

opt_params = dict(lr=lr, weight_decay=1e-4, amsgrad=False)

lr_params = dict(T_max=epochs - (epochs // 10), eta_min=1e-5)

warmup_params = dict(multiplier=1, total_epoch=epochs // 10)

sr_params = dict(
    decay_schedules="inverse_sigmoid_decay",
    k=epochs,
    start=1.0,
    end=0.01,
    slope=0.3,
)


In [None]:
_, optimizer, _, lr_scheduler, sr_scheduler = setting_model(
    X,
    cat_emb,
    all_cat_cols,
    all_num_cols,
    model_params,
    opt_params,
    lr_params,
    warmup_params,
    sr_params,
)

plot_lr_and_sr(epochs, optimizer, lr_scheduler, sr_scheduler)

In [None]:
model = RecursiveModel(bptt_y, all_lags)
model.setting_model(
    *setting_model(
        X,
        cat_emb,
        all_cat_cols,
        all_num_cols,
        model_params,
        opt_params,
        lr_params,
        warmup_params,
        sr_params,
    )
)

In [None]:
model.model

In [None]:
print('Training...')
losses = model.train(model_path, trainloader, validloader, epochs)

del trainloader
gc.collect()

plot_losses(model, losses)

In [None]:
pred_idx = testloader.dataset.en_cat_i[:, 0, 0].argsort()
train_pred_d_cols = train_d_cols[-test_size*2:-test_size]
valid_pred_d_cols = train_d_cols[-test_size:]

trainmaskset = TSdatasets(
    X, 
    train_d_cols[-((test_size * 2) + max_lag):-test_size], 
    all_num_cols, 
    all_cat_cols, 
    bptt_x, 
    bptt_y, 
    max_lag,
    mask=True
)
trainmaskloader = torch.utils.data.DataLoader(
    trainmaskset, batch_size=batch_size, shuffle=False
)

print('Predicting...')

output_train = output_inverse(
    model.predict(trainmaskloader)[pred_idx], 
    list_dtrans[0], 
    id_enc.classes_, 
    train_pred_d_cols,
    diff_trans,
    sales_train.loc[
        :N_IDS, train_d_cols[-test_size*2-1]].values,    
)

output_valid = output_inverse(
    model.predict(validmaskloader)[pred_idx], 
    list_dtrans[0], 
    id_enc.classes_, 
    valid_pred_d_cols,
    diff_trans,
    sales_train.loc[
        :N_IDS, train_d_cols[-test_size-1]].values
)

output_test = output_inverse(
    model.predict(testloader)[pred_idx], 
    list_dtrans[0], 
    id_enc.classes_, 
    test_d_cols,
    diff_trans,
    sales_train.loc[
        :N_IDS, train_d_cols[-1]].values
)


output_train = pd.concat(
    [sales_train[level_cols].set_index('id'), output_train], 
    axis=1
)

output_valid = pd.concat(
    [sales_train[level_cols].set_index('id'), output_valid], 
    axis=1
)

true_train = sales_train[level_cols + train_pred_d_cols[-test_size:]].set_index('id')
true_valid = sales_train[level_cols + valid_pred_d_cols].set_index('id')


# Evaluation

In [None]:
plot_eval(
    Path(vis_dir, 'eval_train'), 
    output_train[train_pred_d_cols[-test_size:]], 
    true_train[train_pred_d_cols[-test_size:]]
)
plot_eval(
    Path(vis_dir, 'eval_valid'), 
    output_valid[valid_pred_d_cols], 
    true_valid[valid_pred_d_cols]
)

In [None]:
plot_eval_per_group(
    Path(vis_dir, 'eval_valid'), output_valid, true_valid, 'dept_id', valid_pred_d_cols
)
plot_eval_per_group(
    Path(vis_dir, 'eval_valid'), output_valid, true_valid, 'store_id', valid_pred_d_cols
)

In [None]:
plot_prediction(
    Path(vis_dir, 'pred_test'), 
    sales_train.loc[: N_IDS - 1, train_d_cols], 
    output_test
)

In [None]:
eval_quantity(
    Path(reports_dir, 'eval_results.json'),
    output_valid,
    sales_train,
    calendar,
    sell_prices,
    valid_pred_d_cols,
)


# Submission

In [None]:
my_submission = to_submission(
    output_test,
    sales_train,
    sample_submission,
    test_d_cols,
    d2F_map,
    private_sub=private_sub,

)

my_submission.to_csv("submission.csv")

In [None]:
my_submission