# Capstone: LSTM + CNN-GRU Time Series Notebook

This notebook contains a complete pipeline to load `smart_mobility_dataset.csv` (assumed at `/mnt/data/smart_mobility_dataset.csv`), preprocess it, create sliding windows, train an LSTM and a CNN-GRU hybrid to predict `Road_Occupancy_%` (configurable), and evaluate RMSE/NRMSE. 

It also includes guidance on target scaling and reporting RMSE in original units.

----
⚠️ **Note:** This notebook includes `pip install` commands commented out. Run them in your environment if required (e.g., `pip install tensorflow pandas scikit-learn matplotlib`).


In [2]:
# Setup: uncomment and run these if packages missing
# !pip install --upgrade pip
# !pip install tensorflow pandas scikit-learn matplotlib seaborn

import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import json

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GRU, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

print('TensorFlow version:', tf.__version__)


TensorFlow version: 2.18.0


In [3]:
# Load the dataset (ensure the CSV is at smart_mobility_dataset.csv)
csv_path = 'smart_mobility_dataset.csv'
assert os.path.exists(csv_path), f'Dataset not found at {csv_path}. Please upload there.'
df = pd.read_csv(csv_path)
print('Shape:', df.shape)
display(df.head())
display(df.describe())


Shape: (5000, 15)


Unnamed: 0,Timestamp,Latitude,Longitude,Vehicle_Count,Traffic_Speed_kmh,Road_Occupancy_%,Traffic_Light_State,Weather_Condition,Accident_Report,Sentiment_Score,Ride_Sharing_Demand,Parking_Availability,Emission_Levels_g_km,Energy_Consumption_L_h,Traffic_Condition
0,2024-03-01 00:00:00,40.842275,-73.703149,205,49.893435,82.65278,Yellow,Clear,0,-0.609199,2,45,450.760055,19.574337,High
1,2024-03-01 00:05:00,40.831119,-73.987354,202,22.383965,45.829298,Green,Clear,0,0.965442,16,1,321.800341,5.385554,High
2,2024-03-01 00:10:00,40.819549,-73.732462,252,46.889699,82.772465,Green,Rain,0,0.28966,16,49,231.152655,10.277477,High
3,2024-03-01 00:15:00,40.725849,-73.980134,37,5.730536,37.695567,Red,Fog,0,-0.271965,66,10,410.384292,29.243279,High
4,2024-03-01 00:20:00,40.813265,-73.961631,64,61.348034,22.313358,Red,Snow,0,-0.797606,3,5,364.466342,16.801459,Low


Unnamed: 0,Latitude,Longitude,Vehicle_Count,Traffic_Speed_kmh,Road_Occupancy_%,Accident_Report,Sentiment_Score,Ride_Sharing_Demand,Parking_Availability,Emission_Levels_g_km,Energy_Consumption_L_h
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,40.749645,-73.847433,153.9212,42.111096,54.748397,0.096,-0.005652,50.4878,24.613,272.174927,17.343243
std,0.086123,0.086204,83.523342,21.70772,26.145238,0.294621,0.58379,28.484426,14.532511,130.086372,7.208277
min,40.600016,-73.999987,10.0,5.002789,10.005031,0.0,-0.999819,1.0,0.0,50.136855,5.003787
25%,40.675403,-73.920281,80.0,23.195752,32.089653,0.0,-0.500154,26.0,12.0,160.564433,11.098761
50%,40.748875,-73.846058,156.0,42.191599,54.657297,0.0,-0.010564,50.0,24.0,272.045513,17.153791
75%,40.824735,-73.771685,226.0,60.75176,77.58172,0.0,0.500518,75.0,37.0,382.242055,23.516595
max,40.899972,-73.700159,299.0,79.997556,99.999729,1.0,0.999354,99.0,49.0,499.922663,29.995416


In [4]:
# Preprocessing: parse timestamp, add cyclical time features, encode categories, fill NA if any
def preprocess_df(df):
    df = df.copy()
    # parse timestamp
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df = df.sort_values('Timestamp').reset_index(drop=True)
        df['hour'] = df['Timestamp'].dt.hour
        df['dow'] = df['Timestamp'].dt.dayofweek
        df['is_weekend'] = df['dow'].isin([5,6]).astype(int)
        # cyclical encodings
        df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
        df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
    
    # simple categorical encodings (one-hot for small-cardinality columns)
    cat_cols = [c for c in ['Traffic_Light_State','Weather_Condition','Traffic_Condition'] if c in df.columns]
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True)
    
    # fill na numeric with forward fill then median
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    df[num_cols] = df[num_cols].fillna(method='ffill').fillna(df[num_cols].median())
    return df

df_p = preprocess_df(df)
print('After preprocess shape:', df_p.shape)
display(df_p.head())


After preprocess shape: (5000, 24)


  df[num_cols] = df[num_cols].fillna(method='ffill').fillna(df[num_cols].median())


Unnamed: 0,Timestamp,Latitude,Longitude,Vehicle_Count,Traffic_Speed_kmh,Road_Occupancy_%,Accident_Report,Sentiment_Score,Ride_Sharing_Demand,Parking_Availability,...,is_weekend,hour_sin,hour_cos,Traffic_Light_State_Red,Traffic_Light_State_Yellow,Weather_Condition_Fog,Weather_Condition_Rain,Weather_Condition_Snow,Traffic_Condition_Low,Traffic_Condition_Medium
0,2024-03-01 00:00:00,40.842275,-73.703149,205,49.893435,82.65278,0,-0.609199,2,45,...,0,0.0,1.0,False,True,False,False,False,False,False
1,2024-03-01 00:05:00,40.831119,-73.987354,202,22.383965,45.829298,0,0.965442,16,1,...,0,0.0,1.0,False,False,False,False,False,False,False
2,2024-03-01 00:10:00,40.819549,-73.732462,252,46.889699,82.772465,0,0.28966,16,49,...,0,0.0,1.0,False,False,False,True,False,False,False
3,2024-03-01 00:15:00,40.725849,-73.980134,37,5.730536,37.695567,0,-0.271965,66,10,...,0,0.0,1.0,True,False,True,False,False,False,False
4,2024-03-01 00:20:00,40.813265,-73.961631,64,61.348034,22.313358,0,-0.797606,3,5,...,0,0.0,1.0,True,False,False,False,True,True,False


In [5]:
# Sliding window maker: creates (X, y) for seq-to-one forecasting
def make_windows(df, feature_cols, target_col, seq_len=24, horizon=12):
    X, y = [], []
    data = df[feature_cols].values
    targ = df[target_col].values
    n = len(df)
    for i in range(n - seq_len - horizon + 1):
        X.append(data[i:i+seq_len])
        y.append(targ[i+seq_len+horizon-1])
    X = np.array(X)
    y = np.array(y)
    return X, y

# Choose features and target (customize as needed)
TARGET = 'Road_Occupancy_%' if 'Road_Occupancy_%' in df_p.columns else df_p.columns[0]
FEATURES = [c for c in df_p.columns if c not in ['Timestamp', TARGET]]
print('Target:', TARGET)
print('Number of features:', len(FEATURES))

X_all, y_all = make_windows(df_p, FEATURES, TARGET, seq_len=24, horizon=12)
print('X shape, y shape:', X_all.shape, y_all.shape)


Target: Road_Occupancy_%
Number of features: 22
X shape, y shape: (4965, 24, 22) (4965,)


In [6]:
# Train/val/test split using time-based slicing
n = len(X_all)
train_end = int(n * 0.7)
val_end = int(n * 0.85)
X_train, y_train = X_all[:train_end], y_all[:train_end]
X_val, y_val = X_all[train_end:val_end], y_all[train_end:val_end]
X_test, y_test = X_all[val_end:], y_all[val_end:]
print('Train/Val/Test shapes:', X_train.shape, X_val.shape, X_test.shape)

# Scaling features: fit scaler on flattened training features
n_features = X_train.shape[2]
feature_scaler = StandardScaler()
X_train_flat = X_train.reshape(-1, n_features)
feature_scaler.fit(X_train_flat)

def scale_X(X, scaler):
    s = scaler.transform(X.reshape(-1, X.shape[2])).reshape(X.shape)
    return s

X_train_s = scale_X(X_train, feature_scaler)
X_val_s = scale_X(X_val, feature_scaler)
X_test_s = scale_X(X_test, feature_scaler)

# Scale target with separate scaler (so we can invert RMSE to original units)
target_scaler = StandardScaler()
y_train_s = target_scaler.fit_transform(y_train.reshape(-1,1)).reshape(-1)
y_val_s = target_scaler.transform(y_val.reshape(-1,1)).reshape(-1)
y_test_s = target_scaler.transform(y_test.reshape(-1,1)).reshape(-1)

print('Scaling complete')


Train/Val/Test shapes: (3475, 24, 22) (745, 24, 22) (745, 24, 22)
Scaling complete


In [7]:
def build_lstm(seq_len, n_features):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=(seq_len, n_features)),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

seq_len = X_train_s.shape[1]
n_features = X_train_s.shape[2]
lstm = build_lstm(seq_len, n_features)
lstm.summary()

callbacks = [
    EarlyStopping(patience=8, restore_best_weights=True),
    ReduceLROnPlateau(patience=4, factor=0.5, min_lr=1e-6)
]

# To train, uncomment below (might take time):
# history = lstm.fit(X_train_s, y_train_s, validation_data=(X_val_s, y_val_s),
#                   epochs=100, batch_size=64, callbacks=callbacks)

# After training, to predict and invert scaling:
# y_pred_s = lstm.predict(X_test_s).reshape(-1)
# y_pred = target_scaler.inverse_transform(y_pred_s.reshape(-1,1)).reshape(-1)
# from sklearn.metrics import mean_squared_error
# rmse = mean_squared_error(y_test, y_pred, squared=False)
# print('LSTM Test RMSE:', rmse)


  super().__init__(**kwargs)


In [8]:
def build_cnn_gru(seq_len, n_features):
    inp = Input((seq_len, n_features))
    x = Conv1D(64, kernel_size=3, padding='same', activation='relu')(inp)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(32, kernel_size=3, padding='same', activation='relu')(x)
    x = GRU(64)(x)
    x = Dense(32, activation='relu')(x)
    out = Dense(1)(x)
    model = Model(inp, out)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

cnn_gru = build_cnn_gru(seq_len, n_features)
cnn_gru.summary()

# To train, uncomment and run:
# history2 = cnn_gru.fit(X_train_s, y_train_s, validation_data=(X_val_s, y_val_s),
#                      epochs=100, batch_size=64, callbacks=callbacks)

# Evaluate similarly by predicting, inverse-scaling, and computing RMSE in original units.


In [9]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
def compute_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    nrmse = rmse / (y_true.max() - y_true.min())
    return {'rmse': rmse, 'mae': mae, 'nrmse': nrmse}

print('Metrics helper ready')


Metrics helper ready


In [None]:
# Enhanced Training and Next Week Prediction
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import json
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Enhanced prediction system ready!")


In [None]:
# Comprehensive Metrics Function
def calculate_comprehensive_metrics(y_true, y_pred, model_name):
    """Calculate comprehensive metrics for model evaluation"""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    nrmse = rmse / (y_true.max() - y_true.min())
    
    metrics = {
        'Model': model_name,
        'RMSE': rmse,
        'MAE': mae,
        'R²': r2,
        'MAPE': mape,
        'NRMSE': nrmse
    }
    
    print(f"\n=== {model_name} Performance Metrics ===")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"NRMSE: {nrmse:.4f}")
    
    return metrics

print("Metrics calculation function ready!")


In [None]:
# Train LSTM Model with Enhanced Monitoring
print("Training LSTM Model...")
lstm_history = lstm.fit(
    X_train_s, y_train_s, 
    validation_data=(X_val_s, y_val_s),
    epochs=50,  # Reduced for demo - increase for better results
    batch_size=64, 
    callbacks=callbacks,
    verbose=1
)

# Train CNN-GRU Model
print("\nTraining CNN-GRU Model...")
cnn_gru_history = cnn_gru.fit(
    X_train_s, y_train_s, 
    validation_data=(X_val_s, y_val_s),
    epochs=50,  # Reduced for demo - increase for better results
    batch_size=64, 
    callbacks=callbacks,
    verbose=1
)

print("Both models trained successfully!")


In [None]:
# Model Evaluation and Visualization
# Make predictions
lstm_pred_s = lstm.predict(X_test_s, verbose=0).reshape(-1)
cnn_gru_pred_s = cnn_gru.predict(X_test_s, verbose=0).reshape(-1)

# Inverse transform predictions
lstm_pred = target_scaler.inverse_transform(lstm_pred_s.reshape(-1,1)).reshape(-1)
cnn_gru_pred = target_scaler.inverse_transform(cnn_gru_pred_s.reshape(-1,1)).reshape(-1)

# Calculate metrics
lstm_metrics = calculate_comprehensive_metrics(y_test, lstm_pred, "LSTM")
cnn_gru_metrics = calculate_comprehensive_metrics(y_test, cnn_gru_pred, "CNN-GRU")

# Create comprehensive comparison
metrics_df = pd.DataFrame([lstm_metrics, cnn_gru_metrics])
print("\n=== Model Comparison ===")
print(metrics_df.round(4))


In [None]:
# Comprehensive Visualizations
fig, axes = plt.subplots(2, 3, figsize=(20, 12))
fig.suptitle('Traffic Prediction Model Analysis', fontsize=16, fontweight='bold')

# 1. Training History - LSTM
axes[0,0].plot(lstm_history.history['loss'], label='Training Loss', color='blue')
axes[0,0].plot(lstm_history.history['val_loss'], label='Validation Loss', color='red')
axes[0,0].set_title('LSTM Training History')
axes[0,0].set_xlabel('Epoch')
axes[0,0].set_ylabel('Loss')
axes[0,0].legend()
axes[0,0].grid(True)

# 2. Training History - CNN-GRU
axes[0,1].plot(cnn_gru_history.history['loss'], label='Training Loss', color='green')
axes[0,1].plot(cnn_gru_history.history['val_loss'], label='Validation Loss', color='orange')
axes[0,1].set_title('CNN-GRU Training History')
axes[0,1].set_xlabel('Epoch')
axes[0,1].set_ylabel('Loss')
axes[0,1].legend()
axes[0,1].grid(True)

# 3. Predictions vs Actual - LSTM
axes[0,2].scatter(y_test[:100], lstm_pred[:100], alpha=0.6, color='blue')
axes[0,2].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[0,2].set_title('LSTM: Predictions vs Actual')
axes[0,2].set_xlabel('Actual')
axes[0,2].set_ylabel('Predicted')
axes[0,2].grid(True)

# 4. Predictions vs Actual - CNN-GRU
axes[1,0].scatter(y_test[:100], cnn_gru_pred[:100], alpha=0.6, color='green')
axes[1,0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[1,0].set_title('CNN-GRU: Predictions vs Actual')
axes[1,0].set_xlabel('Actual')
axes[1,0].set_ylabel('Predicted')
axes[1,0].grid(True)

# 5. Time Series Comparison
time_indices = range(100)
axes[1,1].plot(time_indices, y_test[:100], label='Actual', color='black', linewidth=2)
axes[1,1].plot(time_indices, lstm_pred[:100], label='LSTM', color='blue', alpha=0.7)
axes[1,1].plot(time_indices, cnn_gru_pred[:100], label='CNN-GRU', color='green', alpha=0.7)
axes[1,1].set_title('Time Series Comparison (First 100 samples)')
axes[1,1].set_xlabel('Time Steps')
axes[1,1].set_ylabel('Road Occupancy %')
axes[1,1].legend()
axes[1,1].grid(True)

# 6. Metrics Comparison
metrics_for_plot = ['RMSE', 'MAE', 'R²', 'MAPE']
lstm_values = [lstm_metrics[m] for m in metrics_for_plot]
cnn_gru_values = [cnn_gru_metrics[m] for m in metrics_for_plot]

x = np.arange(len(metrics_for_plot))
width = 0.35

axes[1,2].bar(x - width/2, lstm_values, width, label='LSTM', color='blue', alpha=0.7)
axes[1,2].bar(x + width/2, cnn_gru_values, width, label='CNN-GRU', color='green', alpha=0.7)
axes[1,2].set_title('Metrics Comparison')
axes[1,2].set_xlabel('Metrics')
axes[1,2].set_ylabel('Values')
axes[1,2].set_xticks(x)
axes[1,2].set_xticklabels(metrics_for_plot)
axes[1,2].legend()
axes[1,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Visualizations completed!")


In [None]:
# Next Week Prediction System
def predict_next_week(model, scaler, feature_scaler, last_sequence, days=7):
    """Predict traffic for the next week"""
    predictions = []
    current_sequence = last_sequence.copy()
    
    for day in range(days):
        # Scale the current sequence
        current_sequence_scaled = feature_scaler.transform(
            current_sequence.reshape(-1, current_sequence.shape[1])
        ).reshape(current_sequence.shape)
        
        # Make prediction
        pred_scaled = model.predict(current_sequence_scaled.reshape(1, -1, current_sequence.shape[1]), verbose=0)
        pred_original = scaler.inverse_transform(pred_scaled.reshape(-1, 1)).reshape(-1)[0]
        predictions.append(pred_original)
        
        # Update sequence (shift and add prediction)
        # For simplicity, we'll use the prediction as the next value
        # In practice, you'd need to update all features
        new_row = current_sequence[-1].copy()
        new_row[0] = pred_original  # Assuming first feature is our target
        current_sequence = np.vstack([current_sequence[1:], new_row])
    
    return predictions

# Get the last sequence for prediction
last_sequence = X_test[-1]  # Last 24 time steps
print(f"Last sequence shape: {last_sequence.shape}")

# Predict next week with both models
print("Predicting next week traffic...")
lstm_weekly_pred = predict_next_week(lstm, target_scaler, feature_scaler, last_sequence)
cnn_gru_weekly_pred = predict_next_week(cnn_gru, target_scaler, feature_scaler, last_sequence)

# Create dates for next week
last_date = pd.to_datetime(df_p['Timestamp'].iloc[-1])
next_week_dates = [last_date + timedelta(days=i+1) for i in range(7)]

print("Next week predictions completed!")


In [None]:
# Next Week Prediction Visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Next Week Traffic Predictions', fontsize=16, fontweight='bold')

# 1. Weekly Predictions Comparison
axes[0,0].plot(next_week_dates, lstm_weekly_pred, 'o-', label='LSTM Predictions', color='blue', linewidth=2, markersize=8)
axes[0,0].plot(next_week_dates, cnn_gru_weekly_pred, 's-', label='CNN-GRU Predictions', color='green', linewidth=2, markersize=8)
axes[0,0].set_title('Next Week Traffic Predictions')
axes[0,0].set_xlabel('Date')
axes[0,0].set_ylabel('Road Occupancy %')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)
axes[0,0].tick_params(axis='x', rotation=45)

# 2. Historical vs Future
historical_dates = df_p['Timestamp'].iloc[-30:]  # Last 30 days
historical_values = df_p[TARGET].iloc[-30:]

axes[0,1].plot(historical_dates, historical_values, 'o-', label='Historical Data', color='gray', alpha=0.7)
axes[0,1].plot(next_week_dates, lstm_weekly_pred, 'o-', label='LSTM Future', color='blue', linewidth=2)
axes[0,1].plot(next_week_dates, cnn_gru_weekly_pred, 's-', label='CNN-GRU Future', color='green', linewidth=2)
axes[0,1].set_title('Historical vs Future Predictions')
axes[0,1].set_xlabel('Date')
axes[0,1].set_ylabel('Road Occupancy %')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Prediction Confidence (using model ensemble)
ensemble_pred = np.mean([lstm_weekly_pred, cnn_gru_weekly_pred], axis=0)
pred_std = np.std([lstm_weekly_pred, cnn_gru_weekly_pred], axis=0)

axes[1,0].plot(next_week_dates, ensemble_pred, 'o-', label='Ensemble Prediction', color='purple', linewidth=2)
axes[1,0].fill_between(next_week_dates, 
                       np.array(ensemble_pred) - pred_std, 
                       np.array(ensemble_pred) + pred_std, 
                       alpha=0.3, color='purple', label='Confidence Interval')
axes[1,0].set_title('Ensemble Prediction with Confidence')
axes[1,0].set_xlabel('Date')
axes[1,0].set_ylabel('Road Occupancy %')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)
axes[1,0].tick_params(axis='x', rotation=45)

# 4. Daily Traffic Patterns
days_of_week = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
axes[1,1].bar(days_of_week, ensemble_pred, color='skyblue', alpha=0.7, edgecolor='navy')
axes[1,1].set_title('Predicted Daily Traffic Levels')
axes[1,1].set_xlabel('Day of Week')
axes[1,1].set_ylabel('Road Occupancy %')
axes[1,1].grid(True, alpha=0.3)

# Add value labels on bars
for i, v in enumerate(ensemble_pred):
    axes[1,1].text(i, v + 0.5, f'{v:.1f}%', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

# Print prediction summary
print("\n=== Next Week Traffic Predictions ===")
for i, date in enumerate(next_week_dates):
    print(f"{date.strftime('%Y-%m-%d (%A)')}: LSTM={lstm_weekly_pred[i]:.2f}%, CNN-GRU={cnn_gru_weekly_pred[i]:.2f}%, Ensemble={ensemble_pred[i]:.2f}%")

print(f"\nAverage predicted traffic: {np.mean(ensemble_pred):.2f}%")
print(f"Peak predicted traffic: {np.max(ensemble_pred):.2f}%")
print(f"Lowest predicted traffic: {np.min(ensemble_pred):.2f}%")


In [None]:
# Save Models and Scalers for Web Deployment
import joblib
import os

# Create models directory
os.makedirs('models', exist_ok=True)

# Save the best performing model (LSTM based on typical performance)
best_model = lstm
best_model.save('models/traffic_prediction_model.h5')

# Save scalers
joblib.dump(feature_scaler, 'models/feature_scaler.pkl')
joblib.dump(target_scaler, 'models/target_scaler.pkl')

# Save model metadata
model_metadata = {
    'model_type': 'LSTM',
    'sequence_length': seq_len,
    'n_features': n_features,
    'target_column': TARGET,
    'feature_columns': FEATURES,
    'training_date': datetime.now().isoformat(),
    'model_performance': {
        'rmse': lstm_metrics['RMSE'],
        'mae': lstm_metrics['MAE'],
        'r2': lstm_metrics['R²'],
        'mape': lstm_metrics['MAPE']
    }
}

with open('models/model_metadata.json', 'w') as f:
    json.dump(model_metadata, f, indent=2)

print("✅ Models and scalers saved successfully!")
print(f"Model saved to: models/traffic_prediction_model.h5")
print(f"Scalers saved to: models/feature_scaler.pkl, models/target_scaler.pkl")
print(f"Metadata saved to: models/model_metadata.json")


## Notes / Tips
- The notebook predicts `TARGET` at a horizon (by default horizon=12). Adjust `seq_len` and `horizon` to your use-case.
- The notebook scales features and target separately. RMSE reported after inverse-transform is in original units.
- If you need, I can add cross-validation, hyperparameter search (Optuna), or an automated training script.

----
Now save the notebook and open it in Jupyter/Lab to run cells interactively.
