# **Deep Learning Models**
   Goal: Capture complex temporal patterns
## Models
 - LSTM
 - GRU
 - Temporal CNN
 - Seq2Seq
 - Transformer (advanced)

## Techniques
 - Sliding windows
 - Multivariate sequences
 - Early stopping
 - GPU acceleration (if available)

  Output: Best DL model


## DL Foundations & Data Preparation
 - Load feature-engineered dataset
 - Select DL-safe features
 - Normalize numeric features (fit on train only)
 - Encode categoricals (embeddings-ready)
 - Create sliding windows
 - Define sequence tensors
### concepts
 - Multivariate time series
 - Sliding window formulation
 - Sequence-to-one forecasting

### Output
 - X_train_seq, y_train_seq
 - X_valid_seq, y_valid_seq
 - scaler.pkl
 - feature_index.json## Load Feature-Engineered Dataset

In [2]:
# Clone GitHub Repository
!git clone https://github.com/sabin74/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform.git

fatal: destination path 'Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform' already exists and is not an empty directory.


In [21]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
from pathlib import Path
import gc
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Set Project Root
os.chdir('/content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform')
print("Current Directory: ", os.getcwd())

Current Directory:  /content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform


In [5]:
# Load Feature Engineered Data
df = pd.read_parquet('data/features/train_features.parquet')

df['data'] = pd.to_datetime(df['date'])
df = df.sort_values(['store_nbr', 'family', 'date']).reset_index(drop=True)
df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store_type,cluster,...,is_pre_holiday,is_post_holiday,family_freq,store_freq,city_freq,state_freq,sales_log,family_te,store_te,data
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,0.0,1.612195,3.148359,2013-01-01
1,1782,2013-01-02,1,AUTOMOTIVE,2.0,0,Quito,Pichincha,D,13,...,0,1,0.030303,0.018519,0.333333,0.351852,1.098612,1.612195,3.148359,2013-01-02
2,3564,2013-01-03,1,AUTOMOTIVE,3.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.386294,1.612195,3.148359,2013-01-03
3,5346,2013-01-04,1,AUTOMOTIVE,3.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.386294,1.612195,3.148359,2013-01-04
4,7128,2013-01-05,1,AUTOMOTIVE,5.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.791759,1.612195,3.148359,2013-01-05


In [6]:
# Train Validatoin Split
split_date = '2017-01-01'
train_df = df[df['data'] < split_date].copy()
valid_df = df[df['data'] >= split_date].copy()

print(df.shape, train_df.shape, valid_df.shape)

(3054348, 63) (2642706, 63) (411642, 63)


In [7]:
# Feature Selection
TARGET = "sales_log"

NUMERIC_FEATURES = [
    "onpromotion",
    "cluster",
    "dcoilwtico",
    "is_holiday",
    "is_workday",
    "earthquake",
    "is_payday",
    "week_of_year",
    "is_weekend",
    "is_month_end",

    "sales_lag_1",
    "sales_lag_7",
    "sales_lag_14",
    "sales_lag_28",

    "promo_lag_1",
    "promo_lag_7",

    "oil_lag_7",
    "oil_lag_14",
    "oil_lag_28",

    "sales_roll_mean_7",
    "sales_roll_mean_14",
    "sales_roll_mean_28",

    "sales_roll_std_7",
    "sales_roll_std_14",
    "sales_roll_std_28",

    "promo_roll_sum_7",
    "promo_roll_sum_14",
    "promo_roll_sum_28",

    "promo_flag",
    "promo_freq_7",
    "promo_freq_14",
    "promo_freq_28",

    "is_national_holiday",
    "is_regional_holiday",
    "is_local_holiday",
    "is_bridge",
    "is_comp_workday",
    "is_pre_holiday",
    "is_post_holiday",

    "family_freq",
    "store_freq",
    "city_freq",
    "state_freq",
]


CATEGORICAL_FEATURES = [
    "store_nbr",
    "family",
    "city",
    "state",
    "store_type",
    "holiday_type",
    "locale",
]

In [8]:
from pandas.core.indexes import category
# Encode Categoricals (Integer IDs for Embeddings)
# Build Category
category_maps = {}

for col in CATEGORICAL_FEATURES:
    category_maps[col] = {
        category: idx + 1
        for idx, category in enumerate(train_df[col].astype(str).unique())
    }

In [9]:
# Apply Encoding
def encode_categoreis(df, category_maps):
  df = df.copy()
  for col, mapping in category_maps.items():
    df[col] = df[col].astype(str).map(mapping).fillna(-1).astype(int)
  return df

train_df = encode_categoreis(train_df, category_maps)
valid_df = encode_categoreis(valid_df, category_maps)

In [10]:
# Normalize Numeric Features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_df[NUMERIC_FEATURES] = scaler.fit_transform(train_df[NUMERIC_FEATURES])
valid_df[NUMERIC_FEATURES] = scaler.transform(valid_df[NUMERIC_FEATURES])

In [25]:
# Memory Optimization (reduce memory usage)
def reduce_mem_usage(df, ):
  for col in df.columns:
    if df[col].dtype == "float64":
      df[col] = df[col].astype("float32")
    elif df[col].dtype == "int64":
      df[col] = df[col].astype("int32")
  return df

train_df = reduce_mem_usage(train_df)
test_df = reduce_mem_usage(valid_df)
gc.collect()

344

In [15]:
# Define Final Feature Order
DL_FEATURES = NUMERIC_FEATURES + CATEGORICAL_FEATURES
len(DL_FEATURES)

50

## Sliding Window Generator (Reusable Engine)

All DL models will reuse this logic.

### Build window generator:
 - window_size = 28
 - horizon = 1
 - Ensure time continuity per store-family
 - Batch-safe generator (tf / torch compatible)

### Output
 -  Sliding window function
 -  Memory-efficient batching
 -  Tested on small subset

In [17]:
# Sliding Window Function
def sliding_window_generator(
    df,
    window_size=28,
    horizon=1,
    feature_cols=None,
    target_col="sales_log",
    group_cols=("store_nbr", "family"),
    batch_size=256,
):
    """
    Memory-safe sliding window generator.
    Yields batches for DL training.
    """

    X_batch, y_batch = [], []

    for _, gdf in df.groupby(list(group_cols)):
        gdf = gdf.sort_values("date")

        X_values = gdf[feature_cols].values.astype("float32")
        y_values = gdf[target_col].values.astype("float32")

        total_len = len(gdf)
        if total_len < window_size + horizon:
            continue

        for i in range(total_len - window_size - horizon + 1):
            X_batch.append(X_values[i : i + window_size])
            y_batch.append(y_values[i + window_size + horizon - 1])

            if len(X_batch) == batch_size:
                yield np.array(X_batch), np.array(y_batch)
                X_batch, y_batch = [], []

    if X_batch:
        yield np.array(X_batch), np.array(y_batch)


In [18]:
# Create Train / Validation Generator
WINDOW_SIZE = 28
HORIZON = 1
BATCH_SIZE = 256

train_gen = sliding_window_generator(
    df=train_df,
    window_size=WINDOW_SIZE,
    horizon=HORIZON,
    feature_cols=DL_FEATURES,
    target_col="sales_log",
    batch_size=BATCH_SIZE,
)

valid_gen = sliding_window_generator(
    df=valid_df,
    window_size=WINDOW_SIZE,
    horizon=HORIZON,
    feature_cols=DL_FEATURES,
    target_col="sales_log",
    batch_size=BATCH_SIZE,
)


In [19]:
# Sanity Check
Xb, yb = next(train_gen)
print("Train batch:", Xb.shape, yb.shape)

Xv, yv = next(valid_gen)
print("Valid batch:", Xv.shape, yv.shape)

Train batch: (256, 28, 50) (256,)
Valid batch: (256, 28, 50) (256,)


In [20]:
# Save DL Scaler
import joblib
import json

joblib.dump(scaler, 'models/scaler.pkl')

# Save Feature Map
feature_map = {
    "numeric_features": NUMERIC_FEATURES,
    "categorical_features": CATEGORICAL_FEATURES,
    "dl_features_order": DL_FEATURES,
    "category_maps": category_maps,
    "window_size": WINDOW_SIZE,
}

with open("models/dl_feature_map.json", "w") as f:
    json.dump(feature_map, f, indent=4)


## Model 1: LSTM (Baseline DL)
 - Handles long dependencies
 - Strong baseline
 - Easy to interpret
### What we do
 - Build LSTM architecture
 - Sequence → Dense output
 - Early stopping
 - GPU detection

### Output
 - LSTM RMSLE
 - Training curves
 - Saved model

In [23]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2642706 entries, 0 to 3054116
Data columns (total 63 columns):
 #   Column               Dtype         
---  ------               -----         
 0   id                   int64         
 1   date                 datetime64[ns]
 2   store_nbr            int64         
 3   family               int64         
 4   sales                float64       
 5   onpromotion          float64       
 6   city                 int64         
 7   state                int64         
 8   store_type           int64         
 9   cluster              float64       
 10  dcoilwtico           float64       
 11  holiday_type         int64         
 12  locale               int64         
 13  locale_name          category      
 14  description          category      
 15  is_holiday           float64       
 16  is_workday           float64       
 17  earthquake           float64       
 18  is_payday            float64       
 19  year                 int32