In [1]:
# ===========================================================
# TransitGuard — Hybrid AIS + QPSO Model Prediction & Results
# ===========================================================

import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error, r2_score

# -----------------------------------------------------------
# PATH CONFIGURATION
# -----------------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor"
GTFS_DIR = os.path.join(BASE_DIR, "archive", "GTFS")
MODEL_DIR = os.path.join(BASE_DIR, "hybrid_results")
os.makedirs(MODEL_DIR, exist_ok=True)

# Model files
H5_PATH   = os.path.join(MODEL_DIR, "TransitGuard_Hybrid_LSTM.h5")
PKL_PATH  = os.path.join(MODEL_DIR, "TransitGuard_Hybrid_XGB.pkl")
YAML_PATH = os.path.join(MODEL_DIR, "TransitGuard_Hybrid_Config.yaml")

# -----------------------------------------------------------
# LOAD ARTIFACTS
# -----------------------------------------------------------
print("[INFO] Loading hybrid models and config...")

lstm_model = load_model(H5_PATH)
xgb_model  = joblib.load(PKL_PATH)

with open(YAML_PATH, "r") as f:
    config = yaml.safe_load(f)

print(f"[OK] Loaded models — optimizer: {config['optimizer']}")

# -----------------------------------------------------------
# LOAD GTFS DATA FOR PREDICTION
# -----------------------------------------------------------
print("[INFO] Loading GTFS data...")

stop_times = pd.read_csv(os.path.join(GTFS_DIR, "stop_times.csv"))
trips = pd.read_csv(os.path.join(GTFS_DIR, "trips.csv"))
routes = pd.read_csv(os.path.join(GTFS_DIR, "routes.csv"))
stops = pd.read_csv(os.path.join(GTFS_DIR, "stops.csv"))

df = (
    stop_times
    .merge(trips, on="trip_id", how="left")
    .merge(routes, on="route_id", how="left")
    .merge(stops, on="stop_id", how="left")
)

def time_to_minutes(t):
    try:
        h, m, s = map(int, str(t).split(":"))
        return h * 60 + m + s / 60
    except:
        return np.nan

df["arrival_mins"]   = df["arrival_time"].apply(time_to_minutes)
df["departure_mins"] = df["departure_time"].apply(time_to_minutes)
df["delay_proxy"]    = df["departure_mins"] - df["arrival_mins"]
df["delay_proxy"].fillna(df["delay_proxy"].mean(), inplace=True)

# Encode categorical columns
le_route = LabelEncoder()
le_stop  = LabelEncoder()
df["route_enc"] = le_route.fit_transform(df["route_id"].astype(str))
df["stop_enc"]  = le_stop.fit_transform(df["stop_id"].astype(str))

feature_cols = ["route_enc", "stop_enc", "stop_sequence", "arrival_mins"]
target_col   = "delay_proxy"

df_model = df[feature_cols + [target_col]].dropna()
X = df_model[feature_cols].values
y_true = df_model[target_col].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_seq = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# -----------------------------------------------------------
# HYBRID PREDICTION
# -----------------------------------------------------------
print("[INFO] Generating predictions...")

xgb_pred = xgb_model.predict(X_scaled)
lstm_pred = lstm_model.predict(X_seq, verbose=0).flatten()
hybrid_pred = (xgb_pred + lstm_pred) / 2

rmse = np.sqrt(mean_squared_error(y_true, hybrid_pred))
r2   = r2_score(y_true, hybrid_pred)

print(f"[RESULT] ✅ Hybrid Prediction Complete — RMSE={rmse:.4f}, R²={r2:.4f}")

# -----------------------------------------------------------
# BUILD ROUTE-WISE RESULT SUMMARY
# -----------------------------------------------------------
df_result = df.copy()
df_result["Hybrid_Predicted_Delay(mins)"] = hybrid_pred

route_summary = (
    df_result.groupby("route_id")["Hybrid_Predicted_Delay(mins)"]
    .mean()
    .reset_index()
    .sort_values(by="Hybrid_Predicted_Delay(mins)", ascending=False)
)

# Reliability Classification
def reliability_label(delay):
    if delay > 8:
        return "🔴 Low"
    elif delay > 4:
        return "🟡 Moderate"
    else:
        return "🟢 High"

route_summary["Reliability"] = route_summary["Hybrid_Predicted_Delay(mins)"].apply(reliability_label)

# Add Rank & Save
route_summary["Rank"] = route_summary["Hybrid_Predicted_Delay(mins)"].rank(ascending=True)
route_summary.to_csv(os.path.join(MODEL_DIR, "Hybrid_Prediction_Output.csv"), index=False)

# -----------------------------------------------------------
# SAVE SUMMARY REPORT
# -----------------------------------------------------------
summary_report = {
    "Model": "Hybrid AIS + QPSO (XGBoost + LSTM)",
    "RMSE": float(rmse),
    "R2_Score": float(r2),
    "Top_5_Most_Delayed_Routes": route_summary.head(5).to_dict(orient="records"),
    "Top_5_Most_Reliable_Routes": route_summary.tail(5).to_dict(orient="records"),
    "Total_Routes_Analyzed": int(route_summary.shape[0])
}

report_path = os.path.join(MODEL_DIR, "Hybrid_Result_Report.json")
with open(report_path, "w") as f:
    json.dump(summary_report, f, indent=4)

# -----------------------------------------------------------
# DISPLAY SAMPLE OUTPUT
# -----------------------------------------------------------
print("\n📊 --- Hybrid Transit Reliability Summary ---")
print(route_summary.head(10).to_string(index=False))
print("\n[INFO] Results saved:")
print("   • Hybrid_Prediction_Output.csv")
print("   • Hybrid_Result_Report.json")
print("   → Directory:", MODEL_DIR)



[INFO] Loading hybrid models and config...

[OK] Loaded models — optimizer: Hybrid AIS + QPSO
[INFO] Loading GTFS data...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["delay_proxy"].fillna(df["delay_proxy"].mean(), inplace=True)


[INFO] Generating predictions...
[RESULT] ✅ Hybrid Prediction Complete — RMSE=0.0000, R²=0.0000

📊 --- Hybrid Transit Reliability Summary ---
 route_id  Hybrid_Predicted_Delay(mins) Reliability   Rank
     1008                 -8.202506e-07      🟢 High 1187.0
     1027                 -8.303564e-07      🟢 High 1186.0
     1022                 -8.339521e-07      🟢 High 1185.0
     1019                 -8.373399e-07      🟢 High 1184.0
     1087                 -8.377177e-07      🟢 High 1183.0
     1065                 -8.382010e-07      🟢 High 1182.0
     1066                 -8.386625e-07      🟢 High 1181.0
      933                 -8.398609e-07      🟢 High 1135.5
      118                 -8.398609e-07      🟢 High 1135.5
     1081                 -8.398609e-07      🟢 High 1135.5

[INFO] Results saved:
   • Hybrid_Prediction_Output.csv
   • Hybrid_Result_Report.json
   → Directory: C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor\hybrid_results
