In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from typing import Dict, Any
from tqdm import tqdm
import warnings
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')

import sys
sys.path.append('/home/guest/DemandForecasting')

from data_utils.load_data import load_and_preprocess_data
from model.baseline_models import get_baseline_model

In [None]:
# args
data_path = "/home/guest/DemandForecasting/data/imputed_data.csv"
time_encoded = True
horizon = 90

# process data
data = pd.read_csv(data_path)
data = data.sort_values(by=['store_id', 'product_id', 'dt'])
type = data_path.split('/')[-1][:-4]
if type == 'imputed_data':
    data['hours_sale'] = data['hours_sale'].map(lambda x: x[1:-1].split(', '))
else:
    data['hours_sale'] = data['hours_sale'].map(lambda x: x[1:-1].replace('\n', '').split())
    
data['dt'] = pd.to_datetime(data['dt'])
data['dayofweek'] = data['dt'].dt.dayofweek
data['day'] = data['dt'].dt.day

numerical_features = ['discount', 'precpt', 'avg_temperature', 'avg_humidity', 'avg_wind_level']
binary_features = ['holiday_flag', 'activity_flag']
time_features = ['dayofweek', 'day'] if time_encoded else []

series_num = data.shape[0] // horizon

hours_sale = np.array(data['hours_sale'].tolist(), dtype=float)
hours_sale = hours_sale.reshape(series_num, horizon, 24)[..., 6:22]

numerical_data = data[numerical_features].values.astype(float)
scaler = StandardScaler()
numerical_normalized = scaler.fit_transform(numerical_data)

if time_encoded:
    time_data = data[time_features].values.astype(float)
    time_data[:, 0] = time_data[:, 0] / 6
    time_data[:, 1] = (time_data[:, 1] - 1) / 30
else:
    time_data = np.empty((numerical_data.shape[0], 0))

binary_data = data[binary_features].values.astype(float)
features_combined = np.concatenate([numerical_normalized, binary_data, time_data], axis=1)
features = features_combined.reshape(series_num, horizon, -1)

hours_sale = np.expand_dims(hours_sale, axis=-1)
features = np.expand_dims(features, axis=2)
features = np.broadcast_to(features, (series_num, horizon, hours_sale.shape[2], features.shape[-1]))
hour_encoding = np.broadcast_to(np.arange(16)[None, None, :, None] / 15, (series_num, horizon, 16, 1))

ds = np.concatenate([features, hour_encoding, hours_sale], axis=-1)
ds = ds.reshape(series_num, horizon * 16, -1) # cần ít nhất 17GB RAM để chạy cell này

In [4]:
ds.shape

(50000, 1440, 11)

In [None]:
#  window là 30 ngày gần nhất (window_size = 480), thời gian dự đoán là 7 ngày tiếp theo (pred_len = 112)
window_size = 480
pred_len = 112
total_len = window_size + pred_len
n_features = ds.shape[-1] - 1  # số features (trừ đi hours_sale ở cuối)

x = torch.tensor(ds[0, -total_len:-pred_len, :], dtype=torch.float32)
x_dec = torch.tensor(ds[0, -pred_len:, :n_features], dtype=torch.float32)
y = torch.tensor(ds[0, -pred_len:, -1:], dtype=torch.float32)  # lấy hours_sale ở cuối

In [18]:
print(f"x shape: {x.shape}")
print(f"x_dec shape: {x_dec.shape}")
print(f"y shape: {y.shape}")

x shape: torch.Size([480, 11])
x_dec shape: torch.Size([112, 10])
y shape: torch.Size([112, 1])


In [11]:
class Config:
    def __init__(self, data_type='imputed', use_decoder=True):
        # model parameters
        self.model = 'dlinear'
        self.patience = 5
        self.enable_scheduler = True
        self.seq_len = 480
        self.pred_len = 112
        self.enc_in = 11
        self.dec_in = 10
        self.use_decoder = use_decoder
        self.individual = True
        
        # training parameters
        self.batch_size = 1024
        self.lr = 0.001
        self.epochs = 20
        self.train_ratio = 0.99
        
configs = Config(data_type='imputed', use_decoder=True)

In [12]:
from model.dlinear import Model
import os

model = Model(configs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ckpt_path = '/home/guest/DemandForecasting/demand_forecasting/checkpoints/imputed_decoder/best_dlinear_model.pth'

state_dict = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
model.eval()
with torch.no_grad():
    x = x.unsqueeze(0).to(device)
    x_dec = x_dec.unsqueeze(0).to(device)
    y = y.unsqueeze(0).to(device)
    
    output = model(x, x_dec)
    if not configs.use_decoder:
        output = output[:, :, -1:]

    y_pred_np = output.squeeze(0).cpu().numpy()

x shape: torch.Size([1, 480, 11])
x_dec shape: torch.Size([1, 112, 10])
y shape: torch.Size([1, 112, 1])


In [30]:
y_pred_np = y_pred_np.squeeze()

In [34]:
# giả sử idx của cặp (store_id, product_id) là 0,
# idx sẽ lấy từ website
y_pred_np[0]

np.float32(-0.031625584)

In [None]:
# threshold: wape = 0.3
# trên web sẽ có 1 chạy hàm test để in ra chỉ số wape, nếu chỉ số wape không đạt yêu cầu thì khuyên nên train lại
# nếu người dùng đồng ý train lại thì sẽ chuyển sang trang train quy trình 2, sau khi quy trình 2 train
# xong thì chạy tiếp hàm test, nếu chỉ số wape lại không đạt yêu cầu thì thông bá