In [2]:
# =====================================================
# TransitGuard — Hybrid AIS + QPSO Delay Predictor
# =====================================================

import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from tensorflow.keras import models, layers
from tensorflow.keras.models import load_model
import tensorflow as tf

# Silence TensorFlow deprecated warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# ---------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor"
GTFS_DIR = os.path.join(BASE_DIR, "archive", "GTFS")
OUTPUT_DIR = os.path.join(BASE_DIR, "hybrid_results")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---------------------------------------------------
# LOAD GTFS DATA
# ---------------------------------------------------
print("[INFO] Loading GTFS data...")

stop_times = pd.read_csv(os.path.join(GTFS_DIR, "stop_times.csv"))
trips = pd.read_csv(os.path.join(GTFS_DIR, "trips.csv"))
routes = pd.read_csv(os.path.join(GTFS_DIR, "routes.csv"))
stops = pd.read_csv(os.path.join(GTFS_DIR, "stops.csv"))

df = (
    stop_times
    .merge(trips, on="trip_id", how="left")
    .merge(routes, on="route_id", how="left")
    .merge(stops, on="stop_id", how="left")
)

def time_to_minutes(t):
    try:
        h, m, s = map(int, str(t).split(":"))
        return h * 60 + m + s / 60
    except:
        return np.nan

df["arrival_mins"] = df["arrival_time"].apply(time_to_minutes)
df["departure_mins"] = df["departure_time"].apply(time_to_minutes)
df["delay_proxy"] = df["departure_mins"] - df["arrival_mins"]
df["delay_proxy"] = df["delay_proxy"].fillna(df["delay_proxy"].mean())

le_route = LabelEncoder()
le_stop = LabelEncoder()
df["route_enc"] = le_route.fit_transform(df["route_id"].astype(str))
df["stop_enc"] = le_stop.fit_transform(df["stop_id"].astype(str))

feature_cols = ["route_enc", "stop_enc", "stop_sequence", "arrival_mins"]
target_col = "delay_proxy"

df_model = df[feature_cols + [target_col]].dropna()
X = df_model[feature_cols].values
y = df_model[target_col].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ---------------------------------------------------
# HYBRID AIS + QPSO OPTIMIZER
# ---------------------------------------------------
print("[INFO] Running Hybrid AIS + QPSO optimization...")

def evaluate_model(lr, depth, neurons):
    lr = max(min(lr, 0.3), 0.001)
    depth = int(max(min(depth, 10), 3))
    model = XGBRegressor(
        n_estimators=150,
        learning_rate=lr,
        max_depth=depth,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=-1
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return np.sqrt(mean_squared_error(y_test, preds))

def hybrid_ais_qpso_optimize(objective_func, n_particles=6, n_gen=5):
    np.random.seed(42)
    positions = np.random.uniform(low=0, high=1, size=(n_particles, 3))
    p_best = positions.copy()
    g_best = positions[0]
    p_best_val = np.full(n_particles, np.inf)
    g_best_val = np.inf

    for g in range(n_gen):
        for i in range(n_particles):
            lr = 0.001 + np.clip(positions[i, 0], 0, 1) * 0.299  # [0.001,0.3]
            depth = int(3 + np.clip(positions[i, 1], 0, 1) * 7)  # [3,10]
            neurons = int(16 + np.clip(positions[i, 2], 0, 1) * 112)  # [16,128]
            val = objective_func(lr, depth, neurons)
            if val < p_best_val[i]:
                p_best_val[i] = val
                p_best[i] = positions[i]
            if val < g_best_val:
                g_best_val = val
                g_best = positions[i]

        beta = 0.5 + 0.5 * np.random.rand()
        mean_best = np.mean(p_best, axis=0)

        for i in range(n_particles):
            u = np.random.rand(3)
            p = beta * p_best[i] + (1 - beta) * g_best
            L = np.abs(mean_best - positions[i]) * np.log(1 / u)
            sign = np.random.choice([-1, 1], 3)
            positions[i] = np.clip(p + sign * L, 0, 1)

        clone_rate = 0.2
        clones = int(n_particles * clone_rate)
        for _ in range(clones):
            mutant = np.clip(g_best + np.random.normal(0, 0.1, 3), 0, 1)
            lr = 0.001 + mutant[0] * 0.299
            depth = int(3 + mutant[1] * 7)
            neurons = int(16 + mutant[2] * 112)
            val = objective_func(lr, depth, neurons)
            if val < g_best_val:
                g_best_val = val
                g_best = mutant

        print(f"[GEN {g+1}] Best RMSE = {g_best_val:.4f}")
    return g_best, g_best_val

best_params, best_score = hybrid_ais_qpso_optimize(evaluate_model)
print(f"[INFO] Optimal Params: {best_params}, RMSE={best_score:.4f}")

# ---------------------------------------------------
# TRAIN FINAL HYBRID MODEL
# ---------------------------------------------------
print("[INFO] Training final Hybrid XGBoost + LSTM model...")

xgb_model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.001 + best_params[0] * 0.299,
    max_depth=int(3 + best_params[1] * 7),
    random_state=42
)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

# LSTM section
X_train_seq = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_seq = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

lstm_model = models.Sequential([
    layers.LSTM(int(16 + best_params[2] * 112), input_shape=(1, X_train.shape[1])),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_train_seq, y_train, epochs=5, batch_size=32, verbose=0)
lstm_pred = lstm_model.predict(X_test_seq).flatten()

# Ensemble (Hybrid)
hybrid_pred = (xgb_pred + lstm_pred) / 2
rmse = np.sqrt(mean_squared_error(y_test, hybrid_pred))
r2 = r2_score(y_test, hybrid_pred)
print(f"[RESULT] ✅ Hybrid AIS+QPSO RMSE={rmse:.4f}, R²={r2:.4f}")

# ---------------------------------------------------
# SAVE ARTIFACTS
# ---------------------------------------------------
print("[INFO] Saving hybrid artifacts...")

lstm_path = os.path.join(OUTPUT_DIR, "TransitGuard_Hybrid_LSTM.h5")
xgb_path = os.path.join(OUTPUT_DIR, "TransitGuard_Hybrid_XGB.pkl")
yaml_path = os.path.join(OUTPUT_DIR, "TransitGuard_Hybrid_Config.yaml")
json_path = os.path.join(OUTPUT_DIR, "TransitGuard_Hybrid_Results.json")

lstm_model.save(lstm_path)
joblib.dump(xgb_model, xgb_path)

config = {
    "optimizer": "Hybrid AIS + QPSO",
    "params": best_params.tolist(),
    "features": feature_cols,
}
with open(yaml_path, "w") as f:
    yaml.dump(config, f)

results = {
    "RMSE_HYBRID": float(rmse),
    "R2_HYBRID": float(r2),
    "params": best_params.tolist(),
}
with open(json_path, "w") as f:
    json.dump(results, f, indent=4)

# ---------------------------------------------------
# VISUALIZATIONS
# ---------------------------------------------------
print("[INFO] Generating hybrid result graphs...")

# 1️⃣ Accuracy Graph
plt.figure(figsize=(8,6))
plt.scatter(y_test[:300], hybrid_pred[:300], alpha=0.6, color='purple')
plt.xlabel("Actual Delay (mins)")
plt.ylabel("Predicted Delay (mins)")
plt.title("Hybrid AIS + QPSO Accuracy Graph — Predicted vs Actual")
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "accuracy_graph.png"))
plt.close()

# 2️⃣ Heatmap
corr = df_model.corr(numeric_only=True)
plt.figure(figsize=(7,5))
sns.heatmap(corr, cmap="coolwarm", annot=True, fmt=".2f")
plt.title("Feature Correlation Heatmap — TransitGuard Hybrid")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "heatmap.png"))
plt.close()

# 3️⃣ Comparison Graph
plt.figure(figsize=(6,5))
sns.barplot(x=["Hybrid AIS+QPSO"], y=[r2], palette="cool")
plt.title("Model Performance (R² Score)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "comparison_graph.png"))
plt.close()

# 4️⃣ Prediction Graph
route_delay = pd.DataFrame({"route": df["route_id"].values[:len(hybrid_pred)], "pred": hybrid_pred})
avg_route = route_delay.groupby("route")["pred"].mean().sort_values(ascending=False).head(20)
plt.figure(figsize=(10,6))
avg_route.plot(kind="barh", color="slateblue")
plt.gca().invert_yaxis()
plt.title("Top 20 Routes — Hybrid Predicted Delay")
plt.xlabel("Predicted Avg Delay (mins)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "prediction_graph.png"))
plt.close()

# 5️⃣ Final Hybrid Result Graph
plt.figure(figsize=(6,5))
sns.barplot(x=["Hybrid Model"], y=[rmse], palette="magma")
plt.title("Hybrid Model RMSE Result")
plt.ylabel("RMSE (Lower = Better)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "hybrid_prediction_graph.png"))
plt.close()

# ---------------------------------------------------
# DISPLAY SUMMARY
# ---------------------------------------------------
print("\n✅ All hybrid results saved to:", OUTPUT_DIR)
for file in os.listdir(OUTPUT_DIR):
    print("   ", file)



[INFO] Loading GTFS data...
[INFO] Running Hybrid AIS + QPSO optimization...
[GEN 1] Best RMSE = 0.0000
[GEN 2] Best RMSE = 0.0000
[GEN 3] Best RMSE = 0.0000
[GEN 4] Best RMSE = 0.0000
[GEN 5] Best RMSE = 0.0000
[INFO] Optimal Params: [0.61631269 0.87632653 1.        ], RMSE=0.0000
[INFO] Training final Hybrid XGBoost + LSTM model...
[RESULT] ✅ Hybrid AIS+QPSO RMSE=0.0000, R²=0.0000
[INFO] Saving hybrid artifacts...
[INFO] Generating hybrid result graphs...


  saving_api.save_model(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=["Hybrid AIS+QPSO"], y=[r2], palette="cool")



✅ All hybrid results saved to: C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor\hybrid_results
    accuracy_graph.png
    comparison_graph.png
    heatmap.png
    hybrid_prediction_graph.png
    prediction_graph.png
    TransitGuard_Hybrid_Config.yaml
    TransitGuard_Hybrid_LSTM.h5
    TransitGuard_Hybrid_Results.json
    TransitGuard_Hybrid_XGB.pkl



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=["Hybrid Model"], y=[rmse], palette="magma")
