# Capstone: LSTM + CNN-GRU Time Series Notebook

This notebook contains a complete pipeline to load `smart_mobility_dataset.csv` (assumed at `/mnt/data/smart_mobility_dataset.csv`), preprocess it, create sliding windows, train an LSTM and a CNN-GRU hybrid to predict `Road_Occupancy_%` (configurable), and evaluate RMSE/NRMSE. 

It also includes guidance on target scaling and reporting RMSE in original units.

----
⚠️ **Note:** This notebook includes `pip install` commands commented out. Run them in your environment if required (e.g., `pip install tensorflow pandas scikit-learn matplotlib`).


In [None]:
# Setup: uncomment and run these if packages missing
# !pip install --upgrade pip
# !pip install tensorflow pandas scikit-learn matplotlib seaborn

import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import json

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GRU, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

print('TensorFlow version:', tf.__version__)


In [None]:
# Load the dataset (ensure the CSV is at smart_mobility_dataset.csv)
csv_path = 'smart_mobility_dataset.csv'
assert os.path.exists(csv_path), f'Dataset not found at {csv_path}. Please upload there.'
df = pd.read_csv(csv_path)
print('Shape:', df.shape)
display(df.head())
display(df.describe())


In [None]:
# Preprocessing: parse timestamp, add cyclical time features, encode categories, fill NA if any
def preprocess_df(df):
    df = df.copy()
    # parse timestamp
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df = df.sort_values('Timestamp').reset_index(drop=True)
        df['hour'] = df['Timestamp'].dt.hour
        df['dow'] = df['Timestamp'].dt.dayofweek
        df['is_weekend'] = df['dow'].isin([5,6]).astype(int)
        # cyclical encodings
        df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
        df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
    
    # simple categorical encodings (one-hot for small-cardinality columns)
    cat_cols = [c for c in ['Traffic_Light_State','Weather_Condition','Traffic_Condition'] if c in df.columns]
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True)
    
    # fill na numeric with forward fill then median
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    df[num_cols] = df[num_cols].fillna(method='ffill').fillna(df[num_cols].median())
    return df

df_p = preprocess_df(df)
print('After preprocess shape:', df_p.shape)
display(df_p.head())


In [None]:
# Sliding window maker: creates (X, y) for seq-to-one forecasting
def make_windows(df, feature_cols, target_col, seq_len=24, horizon=12):
    X, y = [], []
    data = df[feature_cols].values
    targ = df[target_col].values
    n = len(df)
    for i in range(n - seq_len - horizon + 1):
        X.append(data[i:i+seq_len])
        y.append(targ[i+seq_len+horizon-1])
    X = np.array(X)
    y = np.array(y)
    return X, y

# Choose features and target (customize as needed)
TARGET = 'Road_Occupancy_%' if 'Road_Occupancy_%' in df_p.columns else df_p.columns[0]
FEATURES = [c for c in df_p.columns if c not in ['Timestamp', TARGET]]
print('Target:', TARGET)
print('Number of features:', len(FEATURES))

X_all, y_all = make_windows(df_p, FEATURES, TARGET, seq_len=24, horizon=12)
print('X shape, y shape:', X_all.shape, y_all.shape)


In [None]:
# Train/val/test split using time-based slicing
n = len(X_all)
train_end = int(n * 0.7)
val_end = int(n * 0.85)
X_train, y_train = X_all[:train_end], y_all[:train_end]
X_val, y_val = X_all[train_end:val_end], y_all[train_end:val_end]
X_test, y_test = X_all[val_end:], y_all[val_end:]
print('Train/Val/Test shapes:', X_train.shape, X_val.shape, X_test.shape)

# Scaling features: fit scaler on flattened training features
n_features = X_train.shape[2]
feature_scaler = StandardScaler()
X_train_flat = X_train.reshape(-1, n_features)
feature_scaler.fit(X_train_flat)

def scale_X(X, scaler):
    s = scaler.transform(X.reshape(-1, X.shape[2])).reshape(X.shape)
    return s

X_train_s = scale_X(X_train, feature_scaler)
X_val_s = scale_X(X_val, feature_scaler)
X_test_s = scale_X(X_test, feature_scaler)

# Scale target with separate scaler (so we can invert RMSE to original units)
target_scaler = StandardScaler()
y_train_s = target_scaler.fit_transform(y_train.reshape(-1,1)).reshape(-1)
y_val_s = target_scaler.transform(y_val.reshape(-1,1)).reshape(-1)
y_test_s = target_scaler.transform(y_test.reshape(-1,1)).reshape(-1)

print('Scaling complete')


In [None]:
def build_lstm(seq_len, n_features):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=(seq_len, n_features)),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

seq_len = X_train_s.shape[1]
n_features = X_train_s.shape[2]
lstm = build_lstm(seq_len, n_features)
lstm.summary()

callbacks = [
    EarlyStopping(patience=8, restore_best_weights=True),
    ReduceLROnPlateau(patience=4, factor=0.5, min_lr=1e-6)
]

# To train, uncomment below (might take time):
# history = lstm.fit(X_train_s, y_train_s, validation_data=(X_val_s, y_val_s),
#                   epochs=100, batch_size=64, callbacks=callbacks)

# After training, to predict and invert scaling:
# y_pred_s = lstm.predict(X_test_s).reshape(-1)
# y_pred = target_scaler.inverse_transform(y_pred_s.reshape(-1,1)).reshape(-1)
# from sklearn.metrics import mean_squared_error
# rmse = mean_squared_error(y_test, y_pred, squared=False)
# print('LSTM Test RMSE:', rmse)


In [None]:
def build_cnn_gru(seq_len, n_features):
    inp = Input((seq_len, n_features))
    x = Conv1D(64, kernel_size=3, padding='same', activation='relu')(inp)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(32, kernel_size=3, padding='same', activation='relu')(x)
    x = GRU(64)(x)
    x = Dense(32, activation='relu')(x)
    out = Dense(1)(x)
    model = Model(inp, out)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

cnn_gru = build_cnn_gru(seq_len, n_features)
cnn_gru.summary()

# To train, uncomment and run:
# history2 = cnn_gru.fit(X_train_s, y_train_s, validation_data=(X_val_s, y_val_s),
#                      epochs=100, batch_size=64, callbacks=callbacks)

# Evaluate similarly by predicting, inverse-scaling, and computing RMSE in original units.


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
def compute_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    nrmse = rmse / (y_true.max() - y_true.min())
    return {'rmse': rmse, 'mae': mae, 'nrmse': nrmse}

print('Metrics helper ready')


## Notes / Tips
- The notebook predicts `TARGET` at a horizon (by default horizon=12). Adjust `seq_len` and `horizon` to your use-case.
- The notebook scales features and target separately. RMSE reported after inverse-transform is in original units.
- If you need, I can add cross-validation, hyperparameter search (Optuna), or an automated training script.

----
Now save the notebook and open it in Jupyter/Lab to run cells interactively.
