In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ML_FInal_Project

!pip install -q neuralforecast wandb

import pandas as pd
import numpy as np
import joblib
import wandb
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
from datetime import datetime
import torch
from torch.utils.data import DataLoader, Dataset
from neuralforecast.models import DLinear

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1h3JmMNvF7pLor34P-qm2FEkIev93euuf/ML_FInal_Project
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.8/285.8 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.4/825.4 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.9/68.9 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os, joblib
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from neuralforecast.core import NeuralForecast
from neuralforecast.models import DLinear



wandb.init(
    project="walmart-sales-forecasting",
    entity="lkata22-free-university-of-tbilisi-",
    name="DLinear_pipeline",
    config={
        "model": "DLinear",
        "horizon": 28,
        "input_size": 400,
        "epochs": 500,
        "batch_size": 64,
        "lr": 1e-3,
    }
)
config = wandb.config


DATA_PATH  = 'data'
_df_train  = pd.read_csv(f"{DATA_PATH}/train.csv",    parse_dates=['Date'])
_df_feat   = pd.read_csv(f"{DATA_PATH}/features.csv", parse_dates=['Date'])
_df_store  = pd.read_csv(f"{DATA_PATH}/stores.csv")


df = (
    _df_train
    .merge(_df_feat,  on=['Store','Date','IsHoliday'], how='left')
    .merge(_df_store, on='Store',            how='left')
)


df['y'] = df['Weekly_Sales'].clip(lower=0)
df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)

df_nf = df[['unique_id','Date','y']].rename(columns={'Date':'ds'})


class TrainTestDF2NF(BaseEstimator, TransformerMixin):
    def __init__(self, features_df, stores_df):
        self.features_df = features_df
        self.stores_df   = stores_df

    def fit(self, X, y=None):

        return self

    def transform(self, X):
        if set(['unique_id','ds','y']).issubset(X.columns):
            return X


        df_raw = (
            X
            .merge(self.features_df, on=['Store','Date','IsHoliday'], how='left')
            .merge(self.stores_df,   on='Store',            how='left')
        )

        df_raw['y'] = np.nan
        df_raw['unique_id'] = df_raw['Store'].astype(str) + '_' + df_raw['Dept'].astype(str)
        return df_raw[['unique_id','Date','y']].rename(columns={'Date':'ds'})


class DLinearNF(BaseEstimator, RegressorMixin):
    def __init__(self, h, input_size, freq='W-FRI', lr=1e-3, epochs=500, batch_size=64):
        self.h          = h
        self.input_size = input_size
        self.freq       = freq
        self.lr         = lr
        self.epochs     = epochs
        self.batch_size = batch_size

    def fit(self, X, y=None):

        model = DLinear(
            h=self.h,
            input_size=self.input_size,
            learning_rate=self.lr,
            max_steps=self.epochs,
            batch_size=self.batch_size,
            random_seed=42,
            start_padding_enabled=True
        )
        nf = NeuralForecast(models=[model], freq=self.freq)
        nf.fit(df=X)
        self.nf    = nf
        self.model = model
        return self

    def predict(self, X=None):

        preds = self.nf.predict()
        return preds.rename(columns={'DLinear':'y_pred'})


pipeline = Pipeline([
    ('to_df_nf', TrainTestDF2NF(_df_feat, _df_store)),
    ('dlinear',  DLinearNF(
        h=config.horizon,
        input_size=config.input_size,
        freq='W-FRI',
        lr=config.lr,
        epochs=config.epochs,
        batch_size=config.batch_size
    ))
])


cutoff  = df_nf['ds'].max() - pd.Timedelta(days=28)
train_nf = df_nf[df_nf['ds'] <= cutoff]
val_nf   = df_nf[df_nf['ds'] >  cutoff]

pipeline.fit(train_nf)
val_preds = pipeline.predict(val_nf)


val_eval = val_nf.merge(val_preds, on=['unique_id','ds'])
wm = 100 * np.sum(np.abs(val_eval['y'] - val_eval['y_pred'])) / np.sum(np.abs(val_eval['y']))
print(f"Validation WMAPE: {wm:.3f}%")
wandb.log({"WMAPE": wm})


pipeline.fit(df_nf)
ART_DIR = 'dlinear_nf_artifact'
os.makedirs(ART_DIR, exist_ok=True)
joblib.dump(pipeline, f"{ART_DIR}/dlinear_pipeline.pkl")
artifact = wandb.Artifact("walmart_dlinear_pipeline", type="model")
artifact.add_file(f"{ART_DIR}/dlinear_pipeline.pkl")
wandb.log_artifact(artifact)
wandb.finish()


INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | decomp        | SeriesDecomp  | 0      | train
4 | linear_trend  | Linear        | 11.2 K | train
5 | linear_season | Linear        | 11.2 K | train
--------------------------------------------------------
22.5 K    Trainable params
0         Non-trainable params
22.5 K    Total params
0.090     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval m

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=500` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Validation WMAPE: 12.171%


INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | decomp        | SeriesDecomp  | 0      | train
4 | linear_trend  | Linear        | 11.2 K | train
5 | linear_season | Linear        | 11.2 K | train
--------------------------------------------------------
22.5 K    Trainable params
0         Non-trainable params
22.5 K    Total params
0.090     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=500` reached.


0,1
WMAPE,▁

0,1
WMAPE,12.17109
