In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from typing import Dict, Any
from tqdm import tqdm
import warnings
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')

import sys
sys.path.append('/home/quang_ai/DemandForecasting')

from data_utils.load_data import load_and_preprocess_data
from model.baseline_models import get_baseline_model

In [35]:
data_path = "/home/quang_ai/DemandForecasting/data/imputed_data.csv"
data = pd.read_csv(data_path)

In [63]:
def get_forecast_data(data: pd.DataFrame, store_id: int, product_id: int, start_day: int, month: int, year: int, days_ahead: int = 7) -> pd.DataFrame:
    forecast_data = data[(data['store_id'] == store_id) & (data['product_id'] == product_id) ].copy()
    forecast_data['dt'] = pd.to_datetime(forecast_data['dt'])
    previous_date = pd.Timestamp(year=year, month=month, day=start_day) - pd.Timedelta(days=30)
    start_date = pd.Timestamp(year=year, month=month, day=start_day)
    end_date   = start_date + pd.Timedelta(days=days_ahead)

    range_forecast_date = pd.date_range(
        start=start_date,
        end=end_date - pd.Timedelta(days=1)
    ).strftime('%m/%d/%Y').tolist()

    forecast_data = forecast_data[
        (forecast_data['dt'] >= previous_date) &
        (forecast_data['dt'] < end_date)
    ]
    return forecast_data, range_forecast_date

In [48]:
def get_torch_forecast_data(forecast_data: pd.DataFrame, store_id: int, product_id: int, start_day: int, month: int, year: int):
    # arguments
    series_num = 1
    horizon = 37
    window_size = 30 * 16

    forecast_data['hours_sale'] = forecast_data['hours_sale'].map(
        lambda x: x[1:-1].split(', ')
    )
    forecast_data['dayofweek'] = forecast_data['dt'].dt.dayofweek
    forecast_data['day'] = forecast_data['dt'].dt.day

    numerical_features = [
        'discount', 'precpt',
        'avg_temperature', 'avg_humidity', 'avg_wind_level'
    ]
    binary_features = ['holiday_flag', 'activity_flag']
    time_features = ['dayofweek', 'day']

    hours_sale = np.array(
        forecast_data['hours_sale'].tolist(),
        dtype=float
    )
    hours_sale = hours_sale.reshape(series_num, horizon, 24)[..., 6:22]

    numerical_data = forecast_data[numerical_features].values.astype(float)
    scaler = StandardScaler()
    numerical_normalized = scaler.fit_transform(numerical_data)

    time_data = forecast_data[time_features].values.astype(float)
    time_data[:, 0] = time_data[:, 0] / 6
    time_data[:, 1] = (time_data[:, 1] - 1) / 30

    binary_data = forecast_data[binary_features].values.astype(float)

    features_combined = np.concatenate(
        [numerical_normalized, binary_data, time_data],
        axis=1
    )
    features = features_combined.reshape(series_num, horizon, -1)

    hours_sale = np.expand_dims(hours_sale, axis=-1)
    features = np.expand_dims(features, axis=2)
    features = np.broadcast_to(
        features,
        (series_num, horizon, hours_sale.shape[2], features.shape[-1])
    )

    hour_encoding = np.broadcast_to(
        np.arange(16)[None, None, :, None] / 15,
        (series_num, horizon, 16, 1)
    )

    ds = np.concatenate(
        [features, hour_encoding, hours_sale],
        axis=-1
    )
    ds = ds.reshape(series_num, horizon * 16, -1)
    ds = ds.squeeze(0)

    n_features = ds.shape[-1] - 1  # trừ đi hours_sale

    x = torch.tensor(ds[:window_size, :], dtype=torch.float32)
    x_dec = torch.tensor(ds[window_size:, :n_features], dtype=torch.float32)
    y = torch.tensor(ds[window_size:, -1:], dtype=torch.float32)

    return x, x_dec, y


In [64]:
forecast_data, range_forecast_date = get_forecast_data(data, store_id=11, product_id=267, start_day=1, month=5, year=2024, days_ahead=7)
x, x_dec, y = get_torch_forecast_data(forecast_data, store_id=11, product_id=267, start_day=1, month=5, year=2024)

In [52]:
class Config:
    def __init__(self, data_type='imputed', use_decoder=True):
        # model parameters
        self.model = 'dlinear'
        self.patience = 5
        self.enable_scheduler = True
        self.seq_len = 480
        self.pred_len = 112
        self.enc_in = 11
        self.dec_in = 10
        self.use_decoder = use_decoder
        self.individual = True
        
        # training parameters
        self.batch_size = 1024
        self.lr = 0.001
        self.epochs = 20
        self.train_ratio = 0.99
        
configs = Config(data_type='imputed', use_decoder=True)

In [53]:
from model.dlinear import Model
import os

model = Model(configs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ckpt_path = '/home/quang_ai/DemandForecasting/demand_forecasting/checkpoints/imputed_decoder/best_dlinear_model.pth'

state_dict = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [54]:
model.eval()
with torch.no_grad():
    x = x.unsqueeze(0).to(device)
    x_dec = x_dec.unsqueeze(0).to(device)
    y = y.unsqueeze(0).to(device)
    
    output = model(x, x_dec)
    if not configs.use_decoder:
        output = output[:, :, -1:]

    y_pred_np = output.squeeze(0).cpu().numpy()

In [55]:
y_pred_np = y_pred_np.squeeze()

In [66]:
y_pred_daily = (y_pred_np.reshape(-1, 16).sum(axis=1)).tolist()

In [67]:
y_pred_daily

[33.59075164794922,
 32.440765380859375,
 32.749595642089844,
 36.566734313964844,
 34.44625473022461,
 31.871763229370117,
 33.124961853027344]

In [68]:
final_forecast = pd.DataFrame({
    'date': range_forecast_date,
    'qty': y_pred_daily
})

In [70]:
final_forecast.to_csv('final_forecast.csv', index=False)