In [1]:
from sklearn.preprocessing import StandardScaler
import os

from TSForecasting.utils.timefeatures import time_features
import numpy as np
import pandas as pd
from torch.utils.data import Dataset

from train_model import train
from TSForecasting.data_provider.data_factory import data_provider
from TSForecasting.models.ConvTimeNet import Model


In [2]:
args = {

    # Starters
    'root_dir': '.', 'data_path': 'data/',

    'train_data_path': 'data/processed/train_set_processed.csv', 'train_flag': 'train',
    'val_data_path': 'data/processed/train_set_processed.csv', 'val_flag': 'val',
    'test_data_path': 'data/processed/train_set_processed.csv', 'test_flag': 'test',

    'unseen_data_path': 'data/processed/test_set_processed.csv', 'unseen_data_flag': 'pred',
    'data': 'custom', 'features': 'MS', 'target': 'orders',
    'batch_size': 16, 'freq': 'd', 'seq_len': 14, 'label_len': 14, 'pred_len': 1,
    'embed': 'timeF',

    # Training params
    'checkpoints': './checkpoints/', 'patience': 5, 'use_amp': True, 'train_epochs': 2, 'learning_rate': 0.00001,
    
    'enc_in': 14, 'e_layers': 6, 'd_model': 128, 'd_ff': 256, 'dropout': 0.05, 'head_dropout': 0.0,
    'patch_ks': 16, 'patch_sd': 3, 'padding_patch': 'end', 'revin': 1, 'affine': 0,
    'subtract_last': 0, 'dw_ks': [11, 15, 21, 29, 39, 51], 're_param': 1, 're_param_kernel': 3,
    'enable_res_param': 1, 'norm': 'batch', 'act': "gelu", 'head_type': 'flatten',

    # Test
    'test_flop': False, 'do_predict': True,

}

In [3]:
original_train_set = pd.read_csv('data/processed/train_set_processed.csv')
original_train_set

Unnamed: 0,date,warehouse,holiday,shops_closed,winter_school_holidays,school_holidays,day,month,year,day_of_week,week_of_year,is_weekend,is_holiday,quarter,orders
0,2020-12-05,4,0,0,0,0,5,12,2020,5,49,1,0,4,6895.0
1,2020-12-06,4,0,0,0,0,6,12,2020,6,49,1,0,4,6584.0
2,2020-12-07,4,0,0,0,0,7,12,2020,0,50,0,0,4,7030.0
3,2020-12-08,4,0,0,0,0,8,12,2020,1,50,0,0,4,6550.0
4,2020-12-09,4,0,0,0,0,9,12,2020,2,50,0,0,4,6910.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7335,2024-03-10,1,0,0,0,0,10,3,2024,6,10,1,0,1,6733.0
7336,2024-03-11,1,0,0,0,0,11,3,2024,0,11,0,0,1,6492.0
7337,2024-03-12,1,0,0,0,0,12,3,2024,1,11,0,0,1,6661.0
7338,2024-03-13,1,0,0,0,0,13,3,2024,2,11,0,0,1,6843.0


In [4]:
original_test_set = pd.read_csv('data/processed/test_set_processed.csv')
original_test_set

Unnamed: 0,date,warehouse,holiday,shops_closed,winter_school_holidays,school_holidays,day,month,year,day_of_week,week_of_year,is_weekend,is_holiday,quarter,orders
0,2024-03-16,4,0,0,0,0,16,3,2024,5,11,1,0,1,5000
1,2024-03-17,4,0,0,0,0,17,3,2024,6,11,1,0,1,5000
2,2024-03-18,4,0,0,0,0,18,3,2024,0,12,0,0,1,5000
3,2024-03-19,4,0,0,0,0,19,3,2024,1,12,0,0,1,5000
4,2024-03-20,4,0,0,0,0,20,3,2024,2,12,0,0,1,5000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,2024-05-11,1,0,0,0,0,11,5,2024,5,19,1,0,2,5000
393,2024-05-12,1,0,0,0,0,12,5,2024,6,19,1,0,2,5000
394,2024-05-13,1,0,0,0,0,13,5,2024,0,20,0,0,2,5000
395,2024-05-14,1,0,0,0,0,14,5,2024,1,20,0,0,2,5000


In [5]:
from main import save_train_test_datasets

save_train_test_datasets(root_dir=args['root_dir'], data_path=args['data_path'],
                             preprocessing=True, save_data=True)


Length of train set: (7340, 15)
Length of test set: (397, 14)
Train set columns: Index(['date', 'warehouse', 'holiday', 'shops_closed',
       'winter_school_holidays', 'school_holidays', 'day', 'month', 'year',
       'day_of_week', 'week_of_year', 'is_weekend', 'is_holiday', 'quarter',
       'orders'],
      dtype='object')
Test set columns: Index(['date', 'warehouse', 'holiday', 'shops_closed',
       'winter_school_holidays', 'school_holidays', 'day', 'month', 'year',
       'day_of_week', 'week_of_year', 'is_weekend', 'is_holiday', 'quarter'],
      dtype='object')
Saving processed train and test files...


In [6]:
print(pd.read_csv('data/processed/train_set_processed.csv').shape)
print(pd.read_csv('data/processed/test_set_processed.csv').shape)


(7340, 15)
(397, 15)


In [7]:
train_dataset, train_loader = data_provider(root_path=args['root_dir'], data_path=args['train_data_path'],
                                                flag=args['train_flag'], features=args['features'],
                                                target=args['target'], data=args['data'],
                                                batch_size=args['batch_size'], freq=args['freq'],
                                                seq_len=args['seq_len'], label_len=args['label_len'],
                                                pred_len=args['pred_len'], embed=args['embed'])

# val_dataset, val_loader = data_provider(root_path=args['root_dir'], data_path=args['val_data_path'],
#                                           flag=args['test_flag'], features=args['features'],
#                                           target=args['target'], data=args['data'],
#                                           batch_size=args['batch_size'], freq=args['freq'],
#                                           seq_len=args['seq_len'], label_len=args['label_len'],
#                                           pred_len=args['pred_len'], embed=args['embed']
#                                           )
# 
# test_dataset, test_loader = data_provider(root_path=args['root_dir'], data_path=args['test_data_path'],
#                                           flag=args['test_flag'], features=args['features'],
#                                           target=args['target'], data=args['data'],
#                                           batch_size=args['batch_size'], freq=args['freq'],
#                                           seq_len=args['seq_len'], label_len=args['label_len'],
#                                           pred_len=args['pred_len'], embed=args['embed']
#                                           )

model = Model(enc_in=args['enc_in'], seq_len=args['seq_len'], pred_len=args['pred_len'],
                  e_layers=args['e_layers'], d_model=args['d_model'], d_ff=args['d_ff'],
                  dropout=args['dropout'], head_dropout=args['head_dropout'],
                  patch_ks=args['patch_ks'], patch_sd=args['patch_sd'],
                  padding_patch=args['padding_patch'], revin=args['revin'],
                  affine=args['affine'], subtract_last=args['subtract_last'],
                  dw_ks=args['dw_ks'], re_param=args['re_param'],
                  re_param_kernel=args['re_param_kernel'],
                  enable_res_param=args['enable_res_param'])

model = train(args, model, train_dataset, train_loader)


Mode: train; datapath: data/processed/train_set_processed.csv, flag: train; features: MS, target: orders, data: custom, batch_size: 16, freq: d, seq_len: 14, label_len: 14, pred_len: 1, embed: timeF
train 7326
	iters: 100, epoch: 1 | loss: 0.1037775
	speed: 0.0128s/iter; left time: 10.4707s
	iters: 200, epoch: 1 | loss: 0.0239570
	speed: 0.0058s/iter; left time: 4.1420s
	iters: 300, epoch: 1 | loss: 0.0092034
	speed: 0.0056s/iter; left time: 3.4341s
	iters: 400, epoch: 1 | loss: 0.0023925
	speed: 0.0055s/iter; left time: 2.8273s
Epoch: 1 cost time: 2.963646173477173
Epoch: 1, Steps: 457 | Train Loss: 0.0734864
Updating learning rate to 1e-05
	iters: 100, epoch: 2 | loss: 0.0899464
	speed: 0.0095s/iter; left time: 3.4013s
	iters: 200, epoch: 2 | loss: 0.0180243
	speed: 0.0053s/iter; left time: 1.3600s
	iters: 300, epoch: 2 | loss: 0.0085974
	speed: 0.0060s/iter; left time: 0.9448s
	iters: 400, epoch: 2 | loss: 0.0021133
	speed: 0.0054s/iter; left time: 0.3108s
Epoch: 2 cost time: 2.6332

In [8]:
for i in train_loader:
    pass

In [9]:
test_set = pd.concat([original_train_set.iloc[-args['seq_len']:, :], original_test_set], ignore_index=True)
test_set.to_csv('data/processed/test_set_processed_modified.csv', index=False)


In [10]:
pd.read_csv('data/processed/test_set_processed_modified.csv')

Unnamed: 0,date,warehouse,holiday,shops_closed,winter_school_holidays,school_holidays,day,month,year,day_of_week,week_of_year,is_weekend,is_holiday,quarter,orders
0,2024-03-01,1,0,0,0,0,1,3,2024,4,9,0,0,1,7249.0
1,2024-03-02,1,0,0,0,0,2,3,2024,5,9,1,0,1,7103.0
2,2024-03-03,1,0,0,0,0,3,3,2024,6,9,1,0,1,6258.0
3,2024-03-04,1,0,0,0,0,4,3,2024,0,10,0,0,1,6450.0
4,2024-03-05,1,0,0,0,0,5,3,2024,1,10,0,0,1,6575.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
406,2024-05-11,1,0,0,0,0,11,5,2024,5,19,1,0,2,5000.0
407,2024-05-12,1,0,0,0,0,12,5,2024,6,19,1,0,2,5000.0
408,2024-05-13,1,0,0,0,0,13,5,2024,0,20,0,0,2,5000.0
409,2024-05-14,1,0,0,0,0,14,5,2024,1,20,0,0,2,5000.0


In [11]:
unseen_data, unseen_loader = data_provider(root_path=args['root_dir'], data_path='data/processed/test_set_processed_modified.csv',
                                              flag=args['unseen_data_flag'], features=args['features'],
                                              target=args['target'], data=args['data'],
                                              batch_size=1, freq=args['freq'],
                                              seq_len=args['seq_len'], label_len=args['label_len'],
                                              pred_len=args['pred_len'], embed=args['embed']
                                              )


Mode: pred; datapath: data/processed/test_set_processed_modified.csv, flag: pred; features: MS, target: orders, data: custom, batch_size: 1, freq: d, seq_len: 14, label_len: 14, pred_len: 1, embed: timeF
pred 1


In [12]:
len(unseen_data)

1

In [13]:
len(train_dataset)

7326

# Inverse Transforming Predictions

In [15]:
# np.load('prediction/orders_forecasting/real_prediction.npy')

In [16]:
# preds = np.load('prediction/orders_forecasting/real_prediction.npy')
# unseen_data.inverse_transform(preds.reshape(-1, preds.shape[-1])).reshape(preds.shape)[0]
