# Module 11 — Time Series & Signal Modeling (Expanded)

This notebook covers:

- creating a synthetic NDVI-like time series dataset
- feature engineering (lags, rolling stats)
- sliding-window data preparation
- LSTM forecasting (Keras)
- simple Transformer-style forecaster using Keras `MultiHeadAttention`
- evaluation metrics (MAE, MAPE) and baseline comparison
- saving models

Designed for classroom demos: uses small synthetic data and short training runs.

## 1 — Setup (install packages and imports)

In [None]:
!pip -q install -U tensorflow==2.12.0 numpy pandas matplotlib scikit-learn --quiet

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
print('TF version:', tf.__version__)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)


## 2 — Create synthetic NDVI-like time series and visualize

In [None]:
# Create a synthetic NDVI time series with seasonality + trend + noise
periods = 60  # months (5 years)
dates = pd.date_range('2018-01-01', periods=periods, freq='M')
trend = 0.01 * np.arange(periods)
seasonal = 0.2 * np.sin(np.linspace(0, 6*np.pi, periods))
noise = np.random.normal(0, 0.03, periods)
ndvi = 0.4 + trend + seasonal + noise

df = pd.DataFrame({'date': dates, 'ndvi': ndvi})
df.set_index('date', inplace=True)
df['ndvi'].plot(figsize=(10,3), title='Synthetic NDVI time series'); plt.grid(True)
plt.show()


## 3 — Feature engineering: lags and rolling statistics

In [None]:
df_feat = df.copy()
for lag in [1,2,3,6,12]:
    df_feat[f'lag_{lag}'] = df_feat['ndvi'].shift(lag)
# rolling mean
df_feat['roll_mean_3'] = df_feat['ndvi'].rolling(3).mean()
df_feat = df_feat.dropna()
print('Feature frame shape:', df_feat.shape)
df_feat.head()


## 4 — Prepare sliding-window arrays for supervised learning

In [None]:
# We'll prepare windows: use past `window_size` values to predict next value
values = df['ndvi'].values
window_size = 12
X, y = [], []
for i in range(len(values)-window_size):
    X.append(values[i:i+window_size])
    y.append(values[i+window_size])
X = np.array(X)  # shape (samples, timesteps)
y = np.array(y)
print('X shape', X.shape, 'y shape', y.shape)

# train/val/test split (no shuffle - time series)
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

# scale using MinMax on training data
scaler = MinMaxScaler()
X_train_flat = scaler.fit_transform(X_train)
X_train = X_train_flat.reshape(X_train.shape[0], X_train.shape[1], 1)
# apply same scaler to val/test (reshape to 2D then back)
X_val = scaler.transform(X_val).reshape(X_val.shape[0], X_val.shape[1], 1)
X_test = scaler.transform(X_test).reshape(X_test.shape[0], X_test.shape[1], 1)

# scale y using same scaler fitted on flattened sequences target? Simpler: scale by overall min/max of train sequences' range
y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1,1)).flatten()
y_val_scaled = y_scaler.transform(y_val.reshape(-1,1)).flatten()
y_test_scaled = y_scaler.transform(y_test.reshape(-1,1)).flatten()

print('Shapes after reshape:', X_train.shape, X_val.shape, X_test.shape)


## 5 — Baseline: persistence model and metrics

In [None]:
def mape(true, pred):
    return np.mean(np.abs((true - pred) / (true + 1e-8))) * 100

# persistence baseline: predict last value in window
persistence_pred = X_test[:, -1, 0]
print('Persistence MAPE:', mape(y_test, persistence_pred))


## 6 — LSTM forecasting model (Keras)

In [None]:
from tensorflow.keras import layers, models
model_lstm = models.Sequential([
    layers.Input(shape=(window_size,1)),
    layers.LSTM(64, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')
model_lstm.summary()

history = model_lstm.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=40, batch_size=8, verbose=0)

# plot loss
plt.figure(figsize=(8,3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend(); plt.title('LSTM training loss')
plt.show()

# predict and invert scaling
pred_lstm_scaled = model_lstm.predict(X_test).flatten()
pred_lstm = y_scaler.inverse_transform(pred_lstm_scaled.reshape(-1,1)).flatten()
print('LSTM Test MAPE:', mape(y_test, pred_lstm))
print('LSTM Test MAE:', mean_absolute_error(y_test, pred_lstm))


## 7 — Simple Transformer-style model (Keras MultiHeadAttention)

In [None]:
from tensorflow.keras import layers, models

# Positional encoding
def add_positional_encoding(x):
    # x shape: (batch, timesteps, features)
    timesteps = tf.shape(x)[1]
    pos = tf.cast(tf.range(timesteps), tf.float32)[..., tf.newaxis]
    return x + pos * 1e-3  # small positional bias (simple)

inputs = layers.Input(shape=(window_size,1))
# project to higher dimension
x = layers.Dense(64)(inputs)
x = add_positional_encoding(x)
# Multi-head attention
attn_output = layers.MultiHeadAttention(num_heads=4, key_dim=16)(x, x)
x = layers.Add()([x, attn_output])
x = layers.LayerNormalization()(x)
# global pooling and output
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(1)(x)
model_trans = models.Model(inputs, outputs)
model_trans.compile(optimizer='adam', loss='mse')
model_trans.summary()

history_t = model_trans.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=30, batch_size=8, verbose=0)

pred_t_scaled = model_trans.predict(X_test).flatten()
pred_t = y_scaler.inverse_transform(pred_t_scaled.reshape(-1,1)).flatten()
print('Transformer-style Test MAPE:', mape(y_test, pred_t))
print('Transformer-style Test MAE:', mean_absolute_error(y_test, pred_t))


## 8 — Visualize forecasts vs actuals

In [None]:
plt.figure(figsize=(10,4))
plt.plot(range(len(y_test)), y_test, label='actual')
plt.plot(range(len(pred_lstm)), pred_lstm, label='LSTM_pred')
plt.plot(range(len(pred_t)), pred_t, label='Transformer_pred')
plt.legend(); plt.title('Forecasts vs Actuals (Test set)')
plt.show()


## 9 — Save models and instructor notes

In [None]:
model_lstm.save('/mnt/data/lstm_forecast.h5')
model_trans.save('/mnt/data/transformer_forecast.h5')
print('Saved models to /mnt/data')

# Notes for instructors:
# - Use real NDVI or other time series for better demonstration.
# - For longer sequences, consider using sliding windows with stride and batching.
# - Discuss proper cross-validation for time series (walk-forward validation).
