In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('/home/guest/DemandForecasting')

from data_utils.load_data import load_and_preprocess_data
from model.baseline_models import get_baseline_model

In [2]:
dataset, scaler = load_and_preprocess_data(
    data_path='/home/guest/DemandForecasting/data/imputed_data.csv',
    time_encoded=True,
    input_len=480,
    target_len=112
)

print(f'Total dataset size: {len(dataset)}')

# split train/val/test
num_train = int(len(dataset) * 0.99)
num_val = int(len(dataset) * (1 - 0.99) / 2)

train_dataset = torch.utils.data.Subset(dataset, list(range(0, num_train)))
test_dataset = torch.utils.data.Subset(dataset, list(range(num_train + num_val, len(dataset))))
del dataset

Total dataset size: 42450000


In [3]:
print(f"Length of Dataset: {len(train_dataset)}")
x, x_dec, y = train_dataset[0]
print(f"Shapes of components of each sample: {x.shape, x_dec.shape, y.shape}")

Length of Dataset: 42025500
Shapes of components of each sample: (torch.Size([480, 11]), torch.Size([112, 10]), torch.Size([112, 1]))


In [4]:
def compute_daily_wape(
    y_true, 
    y_pred, 
    hours_per_day=16,
    eps=1e-8
):
    """
    y_true, y_pred: shape (N, T) or (N, T, C)
    T phải chia hết cho hours_per_day
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Nếu có channel dim → lấy channel cuối
    if y_true.ndim == 3:
        y_true = y_true[..., -1]
        y_pred = y_pred[..., -1]

    assert y_true.shape == y_pred.shape
    assert y_true.shape[1] % hours_per_day == 0

    num_days = y_true.shape[1] // hours_per_day

    # (N, T) → (N, num_days, hours_per_day) → sum theo giờ
    y_true_daily = y_true.reshape(-1, num_days, hours_per_day).sum(axis=2)
    y_pred_daily = y_pred.reshape(-1, num_days, hours_per_day).sum(axis=2)

    return 100 * np.sum(np.abs(y_true_daily - y_pred_daily)) / \
           (np.sum(np.abs(y_true_daily)) + eps)


def evaluate_dlinear_model_dataset(
    model,
    test_dataset,
    use_decoder=True,
    device='cuda',
    max_samples=5000,
    hours_per_day=16
):
    all_predictions = []
    all_targets = []

    model.eval()
    with torch.no_grad():
        for i in tqdm(range(min(len(test_dataset), max_samples)), desc="Testing DLinear"):
            x, x_dec, y = test_dataset[i]

            x = x.unsqueeze(0).to(device)
            x_dec = x_dec.unsqueeze(0).to(device)
            y = y.unsqueeze(0).to(device)

            output = model(x, x_dec)

            if not use_decoder:
                output = output[:, :, -1:]

            all_predictions.append(output.squeeze(0).cpu().numpy())
            all_targets.append(y.squeeze(0).cpu().numpy())

    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)

    return compute_daily_wape(
            all_targets,
            all_predictions,
            hours_per_day=hours_per_day
        )

In [5]:
class Config:
    def __init__(self, data_type='imputed', use_decoder=True):
        # paths
        self.data_type = data_type
        self.data_path = f'/home/guest/DemandForecasting/data/{data_type}_data.csv'
        self.save_path = f'/home/guest/DemandForecasting/demand_forecasting/checkpoints/{data_type}_{"decoder" if use_decoder else "no_decoder"}/'
        self.log_path = f'/home/guest/DemandForecasting/demand_forecasting/logs/{data_type}_{"decoder" if use_decoder else "no_decoder"}/'
        
        # model parameters
        self.model = 'dlinear'
        self.patience = 5
        self.enable_scheduler = True
        self.seq_len = 480
        self.pred_len = 112
        self.enc_in = 11
        self.dec_in = 10
        self.use_decoder = use_decoder
        self.individual = True
        
        # training parameters
        self.batch_size = 1024
        self.lr = 0.001
        self.epochs = 20
        self.train_ratio = 0.99
        
configs = Config(data_type='imputed', use_decoder=True)

In [6]:
from model.dlinear import Model
import os

model = Model(configs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ckpt_path = '/home/guest/DemandForecasting/demand_forecasting/checkpoints/imputed_decoder/best_dlinear_model.pth'

state_dict = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state_dict)

model.eval()

Model(
  (decompsition): series_decomp(
    (moving_avg): moving_avg(
      (avg): AvgPool1d(kernel_size=(25,), stride=(1,), padding=(0,))
    )
  )
  (dec_projection): Linear(in_features=10, out_features=1, bias=True)
  (Linear_Seasonal): ModuleList(
    (0-10): 11 x Linear(in_features=480, out_features=112, bias=True)
  )
  (Linear_Trend): ModuleList(
    (0-10): 11 x Linear(in_features=480, out_features=112, bias=True)
  )
  (Linear_Decoder): ModuleList(
    (0-10): 11 x Linear(in_features=480, out_features=112, bias=True)
  )
)

In [7]:
wape = evaluate_dlinear_model_dataset(
    model,
    test_dataset,
    use_decoder=configs.use_decoder,
    device=device,
    max_samples=5000
)

Testing DLinear: 100%|██████████| 5000/5000 [00:02<00:00, 1820.96it/s]


In [8]:
wape

np.float32(31.939238)