# **Deep Learning Models**
   Goal: Capture complex temporal patterns
## Models
 - LSTM
 - GRU
 - Temporal CNN
 - Seq2Seq
 - Transformer (advanced)

## Techniques
 - Sliding windows
 - Multivariate sequences
 - Early stopping
 - GPU acceleration (if available)

  Output: Best DL model


## DL Foundations & Data Preparation
 - Load feature-engineered dataset
 - Select DL-safe features
 - Normalize numeric features (fit on train only)
 - Encode categoricals (embeddings-ready)
 - Create sliding windows
 - Define sequence tensors
### concepts
 - Multivariate time series
 - Sliding window formulation
 - Sequence-to-one forecasting

### Output
 - X_train_seq, y_train_seq
 - X_valid_seq, y_valid_seq
 - scaler.pkl
 - feature_index.json## Load Feature-Engineered Dataset

In [None]:
# Clone GitHub Repository
!git clone https://github.com/sabin74/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform.git

In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
from pathlib import Path
import gc
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set random seeds for reproducibility
import tensorflow as tf
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Set Project Root
os.chdir('/content/Enterprise-Intelligent-Demand-Forecasting-Decision-Optimization-Platform')
print("Current Directory: ", os.getcwd())

In [None]:
# Load Feature Engineered Data
df = pd.read_parquet('data/features/train_features.parquet')

df['data'] = pd.to_datetime(df['date'])
df = df.sort_values(['store_nbr', 'family', 'date']).reset_index(drop=True)
df.head()

In [None]:
# Train Validatoin Split
split_date = '2017-01-01'
train_df = df[df['data'] < split_date].copy()
valid_df = df[df['data'] >= split_date].copy()

print(df.shape, train_df.shape, valid_df.shape)

In [None]:
# Feature Selection
TARGET = "sales_log"

NUMERIC_FEATURES = [
    "onpromotion",
    "cluster",
    "dcoilwtico",
    "is_holiday",
    "is_workday",
    "earthquake",
    "is_payday",
    "week_of_year",
    "is_weekend",
    "is_month_end",

    "sales_lag_1",
    "sales_lag_7",
    "sales_lag_14",
    "sales_lag_28",

    "promo_lag_1",
    "promo_lag_7",

    "oil_lag_7",
    "oil_lag_14",
    "oil_lag_28",

    "sales_roll_mean_7",
    "sales_roll_mean_14",
    "sales_roll_mean_28",

    "sales_roll_std_7",
    "sales_roll_std_14",
    "sales_roll_std_28",

    "promo_roll_sum_7",
    "promo_roll_sum_14",
    "promo_roll_sum_28",

    "promo_flag",
    "promo_freq_7",
    "promo_freq_14",
    "promo_freq_28",

    "is_national_holiday",
    "is_regional_holiday",
    "is_local_holiday",
    "is_bridge",
    "is_comp_workday",
    "is_pre_holiday",
    "is_post_holiday",

    "family_freq",
    "store_freq",
    "city_freq",
    "state_freq",
]


CATEGORICAL_FEATURES = [
    "store_nbr",
    "family",
    "city",
    "state",
    "store_type",
    "holiday_type",
    "locale",
]

In [None]:
# Encode Categoricals (Integer IDs for Embeddings)
# Build Category
category_maps = {}

for col in CATEGORICAL_FEATURES:
    category_maps[col] = {
        category: idx + 1
        for idx, category in enumerate(train_df[col].astype(str).unique())
    }

In [None]:
# Apply Encoding
def encode_categoreis(df, category_maps):
  df = df.copy()
  for col, mapping in category_maps.items():
    df[col] = df[col].astype(str).map(mapping).fillna(-1).astype(int)
  return df

train_df = encode_categoreis(train_df, category_maps)
valid_df = encode_categoreis(valid_df, category_maps)

In [None]:
# Normalize Numeric Features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_df[NUMERIC_FEATURES] = scaler.fit_transform(train_df[NUMERIC_FEATURES])
valid_df[NUMERIC_FEATURES] = scaler.transform(valid_df[NUMERIC_FEATURES])

In [None]:
# Memory Optimization (reduce memory usage)
def reduce_mem_usage(df, ):
  for col in df.columns:
    if df[col].dtype == "float64":
      df[col] = df[col].astype("float32")
    elif df[col].dtype == "int64":
      df[col] = df[col].astype("int32")
  return df

train_df = reduce_mem_usage(train_df)
test_df = reduce_mem_usage(valid_df)
gc.collect()

In [None]:
# Define Final Feature Order
DL_FEATURES = NUMERIC_FEATURES + CATEGORICAL_FEATURES
len(DL_FEATURES)

train_df = train_df[DL_FEATURES + [TARGET]]
valid_df = valid_df[DL_FEATURES + [TARGET]]

## Sliding Window Generator (Reusable Engine)

All DL models will reuse this logic.

### Build window generator:
 - window_size = 28
 - horizon = 1
 - Ensure time continuity per store-family
 - Batch-safe generator (tf / torch compatible)

### Output
 -  Sliding window function
 -  Memory-efficient batching
 -  Tested on small subset

In [None]:
# Sliding Window Function
def sliding_window_generator(
    df,
    window_size=28,
    horizon=1,
    feature_cols=None,
    target_col="sales_log",
    group_cols=("store_nbr", "family"),
    batch_size=256,
):
    """
    Memory-safe sliding window generator.
    Yields batches for DL training.
    """

    X_batch, y_batch = [], []

    for _, gdf in df.groupby(list(group_cols)):
        gdf = gdf.sort_values("date")

        X_values = gdf[feature_cols].values.astype("float32")
        y_values = gdf[target_col].values.astype("float32")

        total_len = len(gdf)
        if total_len < window_size + horizon:
            continue

        for i in range(total_len - window_size - horizon + 1):
            X_batch.append(X_values[i : i + window_size])
            y_batch.append(y_values[i + window_size + horizon - 1])

            if len(X_batch) == batch_size:
                yield np.array(X_batch), np.array(y_batch)
                X_batch, y_batch = [], []

    if X_batch:
        yield np.array(X_batch), np.array(y_batch)


In [None]:
# Create Train / Validation Generator
WINDOW_SIZE = 28
HORIZON = 1
BATCH_SIZE = 256

train_gen = sliding_window_generator(
    df=train_df,
    window_size=WINDOW_SIZE,
    horizon=HORIZON,
    feature_cols=DL_FEATURES,
    target_col="sales_log",
    batch_size=BATCH_SIZE,
)

valid_gen = sliding_window_generator(
    df=valid_df,
    window_size=WINDOW_SIZE,
    horizon=HORIZON,
    feature_cols=DL_FEATURES,
    target_col="sales_log",
    batch_size=BATCH_SIZE,
)


In [None]:
# Sanity Check
X_train, y_train = next(train_gen)
print("Train batch:", X_train.shape, y_train.shape)

X_valid, y_valid = next(valid_gen)
print("Valid batch:", X_valid.shape, y_valid.shape)

In [None]:

# Save DL Scaler
import joblib
import json

joblib.dump(scaler, 'models/scaler.pkl')

# Save Feature Map
feature_map = {
    "numeric_features": NUMERIC_FEATURES,
    "categorical_features": CATEGORICAL_FEATURES,
    "dl_features_order": DL_FEATURES,
    "category_maps": category_maps,
    "window_size": WINDOW_SIZE,
}

with open("models/dl_feature_map.json", "w") as f:
    json.dump(feature_map, f, indent=4)


## Model 1: LSTM (Baseline DL)
 - Handles long dependencies
 - Strong baseline
 - Easy to interpret
### What we do
 - Build LSTM architecture
 - Sequence â†’ Dense output
 - Early stopping
 - GPU detection

### Output
 - LSTM RMSLE
 - Training curves
 - Saved model

In [None]:
# Import Libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Evaluation Metrics
def rmsle(y_true, y_pred):
    y_true = np.maximum(y_true, 0)
    y_pred = np.maximum(y_pred, 0)
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [None]:
# Built LSMT Model - Basseline Architecture
num_features = X_train.shape[2]

lstm_model = Sequential([
    LSTM(units=64, return_sequences=True, input_shape=(WINDOW_SIZE, num_features)),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.summary()

In [None]:
# Early Stoppings
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

In [None]:
# Train_model
history = lstm_model.fit(
    X_train, y_train,
    validation_data=(X_train, y_valid),
    epochs=30,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# RMSLE Functoin
def rmsle(y_true, y_pred):
  y_true = np.expm1(y_true)
  y_pred = np.expm1(y_pred)
  y_pred = np.maximum(y_pred, 0)
  return np.sqrt(np.mean((np.log1p(y_true) - np.log1p(y_pred))**2))


In [None]:
# Predict and Evaluate
y_valid_pred = lstm_model.predict(X_valid).reshape(-1)

lstm_rmsle = rmsle(y_valid, y_valid_pred)
print(f"LSTM RMSLE: {lstm_rmsle:.4f}")


In [None]:
def tf_dataset_from_generator(generator_fn):
    return tf.data.Dataset.from_generator(
        generator_fn,
        output_signature=(
            tf.TensorSpec(shape=(None, WINDOW_SIZE, len(DL_FEATURES)), dtype=tf.float32),
            tf.TensorSpec(shape=(None,), dtype=tf.float32),
        ),
    ).prefetch(tf.data.AUTOTUNE)

In [None]:
train_ds = tf_dataset_from_generator(lambda: sliding_window_generator(
    train_df, WINDOW_SIZE, HORIZON, DL_FEATURES, "sales_log", batch_size=256
))

valid_ds = tf_dataset_from_generator(lambda: sliding_window_generator(
    valid_df, WINDOW_SIZE, HORIZON, DL_FEATURES, "sales_log", batch_size=256
))
