In [1]:
import os
import json
import joblib
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.models import load_model

# ---------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor"
GTFS_DIR = os.path.join(BASE_DIR, "archive", "GTFS")
OUTPUT_DIR = os.path.join(BASE_DIR, "visuals")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Paths to artifacts
H5_PATH = os.path.join(BASE_DIR, "TransitGuard_model.h5")
PKL_PATH = os.path.join(BASE_DIR, "TransitGuard_model.pkl")
YAML_PATH = os.path.join(BASE_DIR, "TransitGuard_config.yaml")
JSON_PATH = os.path.join(BASE_DIR, "TransitGuard_results.json")

# ---------------------------------------------------
# RELOAD MODELS & CONFIG
# ---------------------------------------------------
print("[INFO] Loading models and configs...")

lstm_model = load_model(H5_PATH)
xgb_model = joblib.load(PKL_PATH)

with open(YAML_PATH) as f:
    config = yaml.safe_load(f)

with open(JSON_PATH) as f:
    results_prev = json.load(f)

print("[OK] Models loaded successfully.")

# ---------------------------------------------------
# RELOAD GTFS DATA FOR PREDICTION
# ---------------------------------------------------
print("[INFO] Loading GTFS base data for evaluation...")

stop_times = pd.read_csv(os.path.join(GTFS_DIR, "stop_times.csv"))
trips = pd.read_csv(os.path.join(GTFS_DIR, "trips.csv"))
routes = pd.read_csv(os.path.join(GTFS_DIR, "routes.csv"))
stops = pd.read_csv(os.path.join(GTFS_DIR, "stops.csv"))

df = (
    stop_times
    .merge(trips, on="trip_id", how="left")
    .merge(routes, on="route_id", how="left")
    .merge(stops, on="stop_id", how="left")
)

def time_to_minutes(t):
    try:
        h, m, s = map(int, str(t).split(":"))
        return h * 60 + m + s / 60
    except Exception:
        return np.nan

df["arrival_mins"] = df["arrival_time"].apply(time_to_minutes)
df["departure_mins"] = df["departure_time"].apply(time_to_minutes)
df["delay_proxy"] = df["departure_mins"] - df["arrival_mins"]
df["delay_proxy"].fillna(df["delay_proxy"].mean(), inplace=True)
df = df.dropna(subset=["route_id", "stop_id"])

from sklearn.preprocessing import LabelEncoder, StandardScaler

le_route = LabelEncoder()
le_stop = LabelEncoder()
df["route_enc"] = le_route.fit_transform(df["route_id"].astype(str))
df["stop_enc"] = le_stop.fit_transform(df["stop_id"].astype(str))

feature_cols = ["route_enc", "stop_enc", "stop_sequence", "arrival_mins"]
target_col = "delay_proxy"

df_model = df[feature_cols + [target_col]].dropna()
X = df_model[feature_cols].values
y = df_model[target_col].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_seq = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# ---------------------------------------------------
# PREDICTIONS
# ---------------------------------------------------
print("[INFO] Making predictions...")

xgb_pred = xgb_model.predict(X_scaled)
lstm_pred = lstm_model.predict(X_seq, verbose=0).flatten()

rmse_xgb = np.sqrt(mean_squared_error(y, xgb_pred))
r2_xgb = r2_score(y, xgb_pred)
rmse_lstm = np.sqrt(mean_squared_error(y, lstm_pred))
r2_lstm = r2_score(y, lstm_pred)

print(f"[RESULT] XGBoost  RMSE={rmse_xgb:.3f}  R²={r2_xgb:.3f}")
print(f"[RESULT] LSTM     RMSE={rmse_lstm:.3f}  R²={r2_lstm:.3f}")

# ---------------------------------------------------
# 1️⃣ ACCURACY GRAPH (Predicted vs Actual)
# ---------------------------------------------------
plt.figure(figsize=(8,6))
plt.scatter(y[:500], xgb_pred[:500], alpha=0.5, label="XGBoost", s=20)
plt.scatter(y[:500], lstm_pred[:500], alpha=0.5, label="LSTM", s=20)
plt.xlabel("Actual Delay (mins)")
plt.ylabel("Predicted Delay (mins)")
plt.title("TransitGuard Accuracy Graph — Predicted vs Actual")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "accuracy_graph.png"))
plt.close()

# ---------------------------------------------------
# 2️⃣ HEATMAP (Correlation / Route Delay Matrix)
# ---------------------------------------------------
corr = df_model.corr(numeric_only=True)
plt.figure(figsize=(7,5))
sns.heatmap(corr, cmap="coolwarm", annot=True, fmt=".2f")
plt.title("TransitGuard Feature Correlation Heatmap")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "heatmap.png"))
plt.close()

# ---------------------------------------------------
# 3️⃣ COMPARISON GRAPH (Model Performance)
# ---------------------------------------------------
metrics_df = pd.DataFrame({
    "Model": ["XGBoost", "LSTM"],
    "RMSE": [rmse_xgb, rmse_lstm],
    "R2": [r2_xgb, r2_lstm]
})

plt.figure(figsize=(7,5))
sns.barplot(data=metrics_df, x="Model", y="R2", palette="viridis")
plt.title("Model Comparison — R² Scores")
plt.ylabel("R² Score")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "comparison_graph.png"))
plt.close()

# ---------------------------------------------------
# 4️⃣ PREDICTION GRAPH (Route-wise Average Delay)
# ---------------------------------------------------
df_pred = df.copy()
df_pred["xgb_pred"] = xgb_pred
route_summary = df_pred.groupby("route_id")["xgb_pred"].mean().sort_values(ascending=False).head(20)

plt.figure(figsize=(10,6))
route_summary.plot(kind="barh", color="teal")
plt.gca().invert_yaxis()
plt.xlabel("Predicted Avg Delay (mins)")
plt.title("Top 20 Routes — Predicted Delay (XGBoost)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "prediction_graph.png"))
plt.close()

# ---------------------------------------------------
# 5️⃣ RESULT GRAPH (Overall RMSE Comparison)
# ---------------------------------------------------
plt.figure(figsize=(7,5))
sns.barplot(data=metrics_df, x="Model", y="RMSE", palette="magma")
plt.title("Model RMSE Comparison")
plt.ylabel("RMSE (Lower = Better)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "result_graph.png"))
plt.close()

# ---------------------------------------------------
# SAVE UPDATED RESULTS JSON
# ---------------------------------------------------
final_results = {
    "RMSE_XGBoost": float(rmse_xgb),
    "R2_XGBoost": float(r2_xgb),
    "RMSE_LSTM": float(rmse_lstm),
    "R2_LSTM": float(r2_lstm),
    "files_saved": [
        "accuracy_graph.png",
        "heatmap.png",
        "comparison_graph.png",
        "prediction_graph.png",
        "result_graph.png"
    ]
}

json_path = os.path.join(OUTPUT_DIR, "TransitGuard_visual_results.json")
with open(json_path, "w") as f:
    json.dump(final_results, f, indent=4)

print("\n✅ Visualization complete!")
print("[INFO] All graphs saved in:", OUTPUT_DIR)
for f in final_results["files_saved"]:
    print("   ", f)



[INFO] Loading models and configs...

[OK] Models loaded successfully.
[INFO] Loading GTFS base data for evaluation...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["delay_proxy"].fillna(df["delay_proxy"].mean(), inplace=True)


[INFO] Making predictions...
[RESULT] XGBoost  RMSE=0.000  R²=1.000
[RESULT] LSTM     RMSE=0.000  R²=0.000



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=metrics_df, x="Model", y="R2", palette="viridis")



✅ Visualization complete!
[INFO] All graphs saved in: C:\Users\NXTWAVE\Downloads\Public Transport Reliability Predictor\visuals
    accuracy_graph.png
    heatmap.png
    comparison_graph.png
    prediction_graph.png
    result_graph.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=metrics_df, x="Model", y="RMSE", palette="magma")
