# **Deep Learning Models**
   Goal: Capture complex temporal patterns
## Models
 - LSTM
 - GRU
 - Temporal CNN
 - Seq2Seq
 - Transformer (advanced)

## Techniques
 - Sliding windows
 - Multivariate sequences
 - Early stopping
 - GPU acceleration (if available)

  Output: Best DL model


## DL Foundations & Data Preparation
 - Load feature-engineered dataset
 - Select DL-safe features
 - Normalize numeric features (fit on train only)
 - Encode categoricals (embeddings-ready)
 - Create sliding windows
 - Define sequence tensors
### concepts
 - Multivariate time series
 - Sliding window formulation
 - Sequence-to-one forecasting

### Output
 - X_train_seq, y_train_seq
 - X_valid_seq, y_valid_seq
 - scaler.pkl
 - feature_index.json## Load Feature-Engineered Dataset

In [3]:
# Clone GitHub Repository
!git clone https://github.com/sabin74/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform.git

Cloning into 'Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform'...
remote: Enumerating objects: 224, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 224 (delta 1), reused 0 (delta 0), pack-reused 216 (from 1)[K
Receiving objects: 100% (224/224), 27.49 MiB | 17.86 MiB/s, done.
Resolving deltas: 100% (113/113), done.
Filtering content: 100% (18/18), 348.57 MiB | 64.98 MiB/s, done.


In [4]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Set Project Root
os.chdir('/content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform')
print("Current Directory: ", os.getcwd())

Current Directory:  /content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform


In [6]:
# Load Feature Engineered Data
df = pd.read_parquet('data/features/train_features.parquet')

df['data'] = pd.to_datetime(df['date'])
df = df.sort_values(['store_nbr', 'family', 'date']).reset_index(drop=True)
df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,store_type,cluster,...,is_pre_holiday,is_post_holiday,family_freq,store_freq,city_freq,state_freq,sales_log,family_te,store_te,data
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,0.0,1.612195,3.148359,2013-01-01
1,1782,2013-01-02,1,AUTOMOTIVE,2.0,0,Quito,Pichincha,D,13,...,0,1,0.030303,0.018519,0.333333,0.351852,1.098612,1.612195,3.148359,2013-01-02
2,3564,2013-01-03,1,AUTOMOTIVE,3.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.386294,1.612195,3.148359,2013-01-03
3,5346,2013-01-04,1,AUTOMOTIVE,3.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.386294,1.612195,3.148359,2013-01-04
4,7128,2013-01-05,1,AUTOMOTIVE,5.0,0,Quito,Pichincha,D,13,...,0,0,0.030303,0.018519,0.333333,0.351852,1.791759,1.612195,3.148359,2013-01-05


In [7]:
# Train Validatoin Split
split_date = '2017-01-01'
train_df = df[df['data'] < split_date].copy()
valid_df = df[df['data'] >= split_date].copy()

print(df.shape, train_df.shape, valid_df.shape)

(3054348, 63) (2642706, 63) (411642, 63)


In [8]:
# Feature Selection
TARGET = "sales"

NUMERIC_FEATURES = [
    "onpromotion",
    "cluster",
    "dcoilwtico",
    "is_holiday",
    "is_workday",
    "earthquake",
    "is_payday",
    "week_of_year",
    "is_weekend",
    "is_month_end",

    "sales_lag_1",
    "sales_lag_7",
    "sales_lag_14",
    "sales_lag_28",

    "promo_lag_1",
    "promo_lag_7",

    "oil_lag_7",
    "oil_lag_14",
    "oil_lag_28",

    "sales_roll_mean_7",
    "sales_roll_mean_14",
    "sales_roll_mean_28",

    "sales_roll_std_7",
    "sales_roll_std_14",
    "sales_roll_std_28",

    "promo_roll_sum_7",
    "promo_roll_sum_14",
    "promo_roll_sum_28",

    "promo_flag",
    "promo_freq_7",
    "promo_freq_14",
    "promo_freq_28",

    "is_national_holiday",
    "is_regional_holiday",
    "is_local_holiday",
    "is_bridge",
    "is_comp_workday",
    "is_pre_holiday",
    "is_post_holiday",

    "family_freq",
    "store_freq",
    "city_freq",
    "state_freq",
]


CATEGORICAL_FEATURES = [
    "store_nbr",
    "family",
    "city",
    "state",
    "store_type",
    "holiday_type",
    "locale",
]

In [9]:
from pandas.core.indexes import category
# Encode Categoricals (Integer IDs for Embeddings)
# Build Category
category_maps = {}

for col in CATEGORICAL_FEATURES:
    category_maps[col] = {
        category: idx + 1
        for idx, category in enumerate(train_df[col].astype(str).unique())
    }

In [10]:
# Apply Encoding
def encode_categoreis(df, category_maps):
  df = df.copy()
  for col, mapping in category_maps.items():
    df[col] = df[col].astype(str).map(mapping).fillna(-1).astype(int)
  return df

train_df = encode_categoreis(train_df, category_maps)
valid_df = encode_categoreis(valid_df, category_maps)

In [11]:
# Normalize Numeric Features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_df[NUMERIC_FEATURES] = scaler.fit_transform(train_df[NUMERIC_FEATURES])
valid_df[NUMERIC_FEATURES] = scaler.transform(valid_df[NUMERIC_FEATURES])

In [12]:
# Create Window Sequence
# Window Generator Function
def window_generator(df, window_size, features, target, batch_size=256):
    X_batch, y_batch = [], []

    for _, group in df.groupby(['store_nbr', 'family']):
        group = group.sort_values('date')
        X_vals = group[features].values
        y_vals = group[target].values

        for i in range(len(group) - window_size):
            X_batch.append(X_vals[i:i+window_size])
            y_batch.append(y_vals[i+window_size])

            if len(X_batch) == batch_size:
                yield np.array(X_batch), np.array(y_batch)
                X_batch, y_batch = [], []

    if X_batch:
        yield np.array(X_batch), np.array(y_batch)


In [13]:
# Define Final Feature Order
DL_FEATURES = NUMERIC_FEATURES + CATEGORICAL_FEATURES
DL_FEATURES

['onpromotion',
 'cluster',
 'dcoilwtico',
 'is_holiday',
 'is_workday',
 'earthquake',
 'is_payday',
 'week_of_year',
 'is_weekend',
 'is_month_end',
 'sales_lag_1',
 'sales_lag_7',
 'sales_lag_14',
 'sales_lag_28',
 'promo_lag_1',
 'promo_lag_7',
 'oil_lag_7',
 'oil_lag_14',
 'oil_lag_28',
 'sales_roll_mean_7',
 'sales_roll_mean_14',
 'sales_roll_mean_28',
 'sales_roll_std_7',
 'sales_roll_std_14',
 'sales_roll_std_28',
 'promo_roll_sum_7',
 'promo_roll_sum_14',
 'promo_roll_sum_28',
 'promo_flag',
 'promo_freq_7',
 'promo_freq_14',
 'promo_freq_28',
 'is_national_holiday',
 'is_regional_holiday',
 'is_local_holiday',
 'is_bridge',
 'is_comp_workday',
 'is_pre_holiday',
 'is_post_holiday',
 'family_freq',
 'store_freq',
 'city_freq',
 'state_freq',
 'store_nbr',
 'family',
 'city',
 'state',
 'store_type',
 'holiday_type',
 'locale']

In [18]:
# Generate Train Validation Sequence
WINDOW_SIZE = 28

train_gen = window_generator(
    train_df,
    WINDOW_SIZE,
    DL_FEATURES,
    TARGET,
    batch_size=256
)
test_gen = window_generator(
    valid_df,
    WINDOW_SIZE,
    DL_FEATURES,
    TARGET,
    batch_size=256
)

print("Train Sequence: ", next(train_gen)[0].shape, next(train_gen)[1].shape)
print("Test Sequence: ", next(test_gen)[0].shape, next(test_gen)[1].shape)

Train Sequence:  (256, 28, 50) (256,)
Test Sequence:  (256, 28, 50) (256,)


In [23]:
# Save DL Scaler
import joblib
import json

joblib.dump(scaler, 'models/scaler.pkl')

# Save Feature Map
feature_map = {
    "numeric_features": NUMERIC_FEATURES,
    "categorical_features": CATEGORICAL_FEATURES,
    "dl_features_order": DL_FEATURES,
    "category_maps": category_maps,
    "window_size": WINDOW_SIZE,
}

with open("models/dl_feature_map.json", "w") as f:
    json.dump(feature_map, f, indent=4)


## Sliding Window Generator (Reusable Engine)

All DL models will reuse this logic.

### Build window generator:
 - window_size = 28
 - horizon = 1
 - Ensure time continuity per store-family
 - Batch-safe generator (tf / torch compatible)

### Output
 -  Sliding window function
 -  Memory-efficient batching
 -  Tested on small subset

In [22]:
os.getcwd()

'/content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform'