In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/MyDrive/ML_FInal_Project

# Install dependencies
!pip install -q neuralforecast wandb

import pandas as pd
import numpy as np
import torch
import wandb
import os, joblib
from neuralforecast.core import NeuralForecast
from neuralforecast.models import PatchTST
from neuralforecast.losses.numpy import smape as np_smape, mae as np_mae
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin


Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1h3JmMNvF7pLor34P-qm2FEkIev93euuf/ML_FInal_Project
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.8/285.8 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.4/825.4 kB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.9/68.9 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m126.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from sklearn.pipeline import Pipeline
from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST
import wandb
import os, joblib

wandb.login()
run = wandb.init(
    project="walmart-sales-forecasting",
    entity="lkata22-free-university-of-tbilisi-",
    name="PatchTST_with_more_features",
    config={
        'input_size': 180,
        'horizon': 28,
        'max_steps': 1000,
        'batch_size': 128,
        'learning_rate': 1e-3,
        'scaler_type': 'standard',
        'start_padding': True
    }
)
config = run.config

DATA_PATH    = 'data'
_df_train    = pd.read_csv(f"{DATA_PATH}/train.csv",    parse_dates=['Date'])
_df_features = pd.read_csv(f"{DATA_PATH}/features.csv", parse_dates=['Date'])
_df_stores   = pd.read_csv(f"{DATA_PATH}/stores.csv")

# List of covariates to expose
COVARIATES = [
    'IsHoliday',
    'Temperature','Fuel_Price',
    'MarkDown1','MarkDown2','MarkDown3','MarkDown4','MarkDown5',
    'CPI','Unemployment',
    'Size'
    # you could also encode 'Type' as numeric if you like
]

# 2) Transformer: merges raw → NF-format with covariates
class TrainTestDF2NF(BaseEstimator, TransformerMixin):
    def __init__(self, features_df, stores_df, covariate_cols):
        self.features_df   = features_df
        self.stores_df     = stores_df
        self.covariate_cols= covariate_cols

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        # only skip merging if X already has id, ds, y *and* all covariates
        needed = {'unique_id','ds','y'} | set(self.covariate_cols)
        if needed.issubset(X.columns):
            return X

        df = (
            X
            .merge(self.features_df, on=['Store','Date','IsHoliday'], how='left')
            .merge(self.stores_df,   on='Store',            how='left')
        )

        # build y (train vs test)
        if 'Weekly_Sales' in df.columns:
            df['y'] = df['Weekly_Sales'].clip(lower=0)
        else:
            df['y'] = np.nan

        df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)
        df = df.rename(columns={'Date':'ds'})

        # impute all covariates at once
        df[self.covariate_cols] = df[self.covariate_cols].fillna(0)

        # select only the NF inputs
        cols = ['unique_id','ds','y'] + self.covariate_cols
        return df[cols]

# 3) Wrapper: PatchTST with covariate lags wired via pl_kwargs
class PatchTSTNF(BaseEstimator, RegressorMixin):
    def __init__(self,
                 input_size,
                 horizon,
                 scaler_type,
                 max_steps,
                 learning_rate,
                 batch_size,
                 val_check_steps,
                 random_seed,
                 start_padding_enabled,
                 lags_past_covariates,
                 lags_future_covariates,
                 covariate_cols):
        self.input_size            = input_size
        self.horizon               = horizon
        self.scaler_type           = scaler_type
        self.max_steps             = max_steps
        self.learning_rate         = learning_rate
        self.batch_size            = batch_size
        self.val_check_steps       = val_check_steps
        self.random_seed           = random_seed
        self.start_padding_enabled = start_padding_enabled
        self.lags_past_covariates  = lags_past_covariates
        self.lags_future_covariates= lags_future_covariates
        self.covariate_cols        = covariate_cols

    def fit(self, X, y=None):
        # X: DataFrame unique_id, ds, y, + covariates
        model = PatchTST(
            input_size=self.input_size,
            h=self.horizon,
            scaler_type=self.scaler_type,
            max_steps=self.max_steps,
            learning_rate=self.learning_rate,
            batch_size=self.batch_size,
            val_check_steps=self.val_check_steps,
            random_seed=self.random_seed,
            start_padding_enabled=self.start_padding_enabled,
            # Removed stat_exog_list as PatchTST does not support it directly
            # stat_exog_list=['Size'],
            # Removed hist_exog_list as PatchTST does not support it directly
            # hist_exog_list=self.covariate_cols,
            # Removed futr_exog_list as PatchTST does not support it directly
            # futr_exog_list=self.covariate_cols,
        )
        nf = NeuralForecast(
            models=[model],
            freq='W-FRI'
        )
        nf.fit(df=X)
        self.nf    = nf
        self.model = model
        return self

    def predict(self, X=None):
        preds = self.nf.predict()
        return preds.rename(columns={'PatchTST':'y_pred'})

# 4) Build the full pipeline
#   - we choose a lag window for covariates; e.g. past 13 weeks, future 28
LAGS_PAST   = list(range(1,14))     # last 13 weeks of covariates
LAGS_FUTURE = list(range(0, config.horizon+1))  # holiday & markdown forecasts

patchtst_pipeline = Pipeline([
    ('to_df_nf', TrainTestDF2NF(_df_features, _df_stores, COVARIATES)),
    ('patchtst', PatchTSTNF(
        input_size            = config.input_size,
        horizon               = config.horizon,
        scaler_type           = config.scaler_type,
        max_steps             = config.max_steps,
        learning_rate         = config.learning_rate,
        batch_size            = config.batch_size,
        val_check_steps       = 50,
        random_seed           = 42,
        start_padding_enabled = config.start_padding,
        lags_past_covariates  = LAGS_PAST,
        lags_future_covariates= LAGS_FUTURE,
        covariate_cols        = COVARIATES
    ))
])

# 5) Validation split, but keep the raw DataFrame
cutoff = _df_train['Date'].max() - pd.DateOffset(weeks=4)
train_raw = _df_train[_df_train['Date'] <= cutoff]
val_raw   = _df_train[_df_train['Date'] >  cutoff]

# Fit on raw train slice (has Store/Dept/Date/Weekly_Sales/IsHoliday…)
patchtst_pipeline.fit(train_raw)

# Predict on raw val slice (has Store/Dept/Date/IsHoliday…)
val_preds = patchtst_pipeline.predict(val_raw)
# val_preds → DataFrame [unique_id, ds, y_pred]

# Build your “truth” frame from the raw val slice
val_truth = (
    val_raw
    .assign(
        unique_id=lambda d: d['Store'].astype(str) + '_' + d['Dept'].astype(str),
        ds       =lambda d: d['Date'],
        y        =lambda d: d['Weekly_Sales'].clip(0)
    )
    [['unique_id','ds','y']]
)

# Merge & score
val_eval = val_truth.merge(val_preds, on=['unique_id','ds'])
wm = 100 * np.sum(np.abs(val_eval['y'] - val_eval['y_pred'])) / np.sum(np.abs(val_eval['y']))
print(f"Validation WMAPE: {wm:.3f}%")
run.log({"overall_WMAPE": float(wm)})

# 6) Refit on the **entire** raw train and save the pipeline artifact
patchtst_pipeline.fit(_df_train)

# Final forecasting on raw test.csv
_df_test   = pd.read_csv(f"{DATA_PATH}/test.csv", parse_dates=['Date'])
test_preds = patchtst_pipeline.predict(_df_test)
# test_preds → [unique_id, ds, y_pred]

# Build submission
submission = (
    _df_test
    .assign(
        unique_id   = lambda d: d['Store'].astype(str) + '_'
                                + d['Dept'].astype(str),
        ds          = lambda d: d['Date']
    )
    .merge(test_preds, on=['unique_id','ds'], how='left')
    .assign(
        Id           = lambda d: d['Store'].astype(str) + '_'
                                + d['Dept'].astype(str) + '_'
                                + d['Date'].dt.strftime('%Y-%m-%d'),
        Weekly_Sales = lambda d: d['y_pred'].clip(0)
    )
    [['Id','Weekly_Sales']]
)
submission.to_csv('submission_patchtst_pipeline.csv', index=False)

# Save pipeline artifact
MODEL_DIR = "walmart_patchtst_pipeline"
os.makedirs(MODEL_DIR, exist_ok=True)
joblib.dump(patchtst_pipeline, f"{MODEL_DIR}/patchtst_pipeline.pkl")
artifact = wandb.Artifact("walmart_patchtst_pipeline", type="model")
artifact.add_file(f"{MODEL_DIR}/patchtst_pipeline.pkl")
run.log_artifact(artifact)
run.finish()



INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 481 K  | train
-----------------------------------------------------------
481 K     Trainable params
3         Non-trainable params
481 K     Total params
1.925     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1000` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]



Validation WMAPE: 21.018%


INFO:lightning_fabric.utilities.seed:Seed set to 42
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 481 K  | train
-----------------------------------------------------------
481 K     Trainable params
3         Non-trainable params
481 K     Total params
1.925     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]