In [None]:
# Advanced Time Series Forecasting with Deep Learning: LSTM with Attention
# This notebook implements the full project:
# - Load/generate a univariate time series
# - Preprocess & create lookback windows
# - Baseline LSTM model
# - Attention-based LSTM model
# - Train, evaluate, and compare using RMSE/MAE

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers

from statsmodels.datasets import airpassengers

print('TensorFlow version:', tf.__version__)

In [None]:
# Load AirPassengers dataset (monthly airline passengers)
data = airpassengers.load_pandas().data
display(data.head())
display(data.tail())
print('Length of series:', len(data))

In [None]:
# Prepare univariate time series
ts = data['value'].astype('float32').values.reshape(-1, 1)

# Create a monthly DatetimeIndex for plotting
time_index = pd.date_range(start='1949-01', periods=len(ts), freq='M')

plt.figure(figsize=(8, 3))
plt.plot(time_index, ts)
plt.title('AirPassengers Time Series')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.tight_layout()
plt.show()

In [None]:
# Scaling the data
scaler = MinMaxScaler()
ts_scaled = scaler.fit_transform(ts)

def create_sequences(series, lookback=24):
    """Convert a 1D scaled series to sequences for LSTM.
    
    series: shape (N, 1)
    returns X: (N-lookback, lookback, 1), y: (N-lookback, 1)
    """
    X, y = [], []
    for i in range(len(series) - lookback):
        X.append(series[i:i+lookback])
        y.append(series[i+lookback])
    return np.array(X), np.array(y)

# You can adjust LOOKBACK as a hyperparameter
LOOKBACK = 24  # 24 months (2 years)
X_all, y_all = create_sequences(ts_scaled, lookback=LOOKBACK)

print('X_all shape:', X_all.shape)
print('y_all shape:', y_all.shape)

In [None]:
# Train / Val / Test split
n_total = len(X_all)
n_train = int(0.7 * n_total)
n_val = int(0.15 * n_total)

X_train, y_train = X_all[:n_train], y_all[:n_train]
X_val, y_val = X_all[n_train:n_train+n_val], y_all[n_train:n_train+n_val]
X_test, y_test = X_all[n_train+n_val:], y_all[n_train+n_val:]

print('Train shape:', X_train.shape, y_train.shape)
print('Val shape   :', X_val.shape, y_val.shape)
print('Test shape  :', X_test.shape, y_test.shape)

In [None]:
# Helper to compute RMSE and MAE in original scale
def compute_metrics(y_true_scaled, y_pred_scaled, scaler):
    y_true = scaler.inverse_transform(y_true_scaled)
    y_pred = scaler.inverse_transform(y_pred_scaled)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    return rmse, mae

In [None]:
# Baseline LSTM model
def build_baseline_lstm(units=64, dropout=0.2, lr=1e-3):
    model = models.Sequential([
        layers.Input(shape=(LOOKBACK, 1)),
        layers.LSTM(units, return_sequences=False),
        layers.Dropout(dropout),
        layers.Dense(1)
    ])
    opt = optimizers.Adam(learning_rate=lr)
    model.compile(loss='mse', optimizer=opt)
    return model

baseline_model = build_baseline_lstm()
baseline_model.summary()

In [None]:
# Train baseline LSTM
es = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history_base = baseline_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[es],
    verbose=1
)

In [None]:
# Evaluate baseline on test set
y_test_pred_base = baseline_model.predict(X_test)

rmse_base, mae_base = compute_metrics(y_test, y_test_pred_base, scaler)
print(f'Baseline LSTM - RMSE: {rmse_base:.3f}, MAE: {mae_base:.3f}')

In [None]:
# Plot baseline predictions vs actual
y_true_inv = scaler.inverse_transform(y_test)
y_pred_inv = scaler.inverse_transform(y_test_pred_base)

plt.figure(figsize=(8, 3))
plt.plot(y_true_inv, label='Actual')
plt.plot(y_pred_inv, label='Baseline LSTM Pred')
plt.title('Baseline LSTM - Test Set')
plt.xlabel('Time index')
plt.ylabel('Passengers')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Custom Bahdanau-style Attention layer
class BahdanauAttention(layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.W1 = layers.Dense(units)
        self.V = layers.Dense(1)

    def call(self, values):
        """values: LSTM outputs for all time steps, shape (batch, timesteps, hidden)"""
        score = self.V(tf.nn.tanh(self.W1(values)))  # (batch, timesteps, 1)
        attention_weights = tf.nn.softmax(score, axis=1)  # softmax over time
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)  # sum over time
        return context_vector, attention_weights

In [None]:
# Attention-based LSTM model (stacked LSTM + attention)
def build_attention_lstm(units=64, att_units=32, dropout=0.2, lr=1e-3):
    inputs = layers.Input(shape=(LOOKBACK, 1))
    x = layers.LSTM(units, return_sequences=True)(inputs)
    x = layers.Dropout(dropout)(x)
    x2 = layers.LSTM(units, return_sequences=True)(x)  # stacked LSTM

    attention_layer = BahdanauAttention(att_units)
    context_vector, att_weights = attention_layer(x2)

    x_out = layers.Dropout(dropout)(context_vector)
    outputs = layers.Dense(1)(x_out)

    model = models.Model(inputs=inputs, outputs=outputs)
    opt = optimizers.Adam(learning_rate=lr)
    model.compile(loss='mse', optimizer=opt)
    return model

att_model = build_attention_lstm()
att_model.summary()

In [None]:
# Train Attention LSTM
es2 = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history_att = att_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[es2],
    verbose=1
)

In [None]:
# Evaluate Attention LSTM on test set
y_test_pred_att = att_model.predict(X_test)

rmse_att, mae_att = compute_metrics(y_test, y_test_pred_att, scaler)
print(f'Attention LSTM - RMSE: {rmse_att:.3f}, MAE: {mae_att:.3f}')

In [None]:
# Plot Attention predictions vs actual
y_pred_inv_att = scaler.inverse_transform(y_test_pred_att)

plt.figure(figsize=(8, 3))
plt.plot(y_true_inv, label='Actual')
plt.plot(y_pred_inv_att, label='Attention LSTM Pred')
plt.title('Attention LSTM - Test Set')
plt.xlabel('Time index')
plt.ylabel('Passengers')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Simple hyperparameter experiment for baseline LSTM (units)
def train_and_eval_baseline(units):
    model = build_baseline_lstm(units=units)
    es_local = callbacks.EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=60,
        batch_size=32,
        callbacks=[es_local],
        verbose=0
    )
    preds = model.predict(X_val)
    rmse, mae = compute_metrics(y_val, preds, scaler)
    return rmse, mae

units_list = [32, 64, 128]
results = []
for u in units_list:
    rmse_u, mae_u = train_and_eval_baseline(u)
    results.append((u, rmse_u, mae_u))
    print(f'Baseline LSTM (units={u}) - RMSE: {rmse_u:.3f}, MAE: {mae_u:.3f}')

results