In [2]:
# ===============================================================
# 🌬️ AirSage (Hybrid AIS + HSA) — Final Prediction & Result Code
# ===============================================================

import os
import json
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler

# ---------------------------------------------------------------
# 📁 File Paths
# ---------------------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System"
DATA_PATH = os.path.join(BASE_DIR, "archive", "delhi_aqi.csv")
MODEL_PATH = os.path.join(BASE_DIR, "hybrid_AirSage_model.h5")
SCALER_PATH = os.path.join(BASE_DIR, "hybrid_AirSage_scalers.pkl")
RESULT_PATH = os.path.join(BASE_DIR, "hybrid_AirSage_final_predictions.csv")
REPORT_PATH = os.path.join(BASE_DIR, "hybrid_AirSage_final_report.json")

# ---------------------------------------------------------------
# 1️⃣ Load Model and Scalers
# ---------------------------------------------------------------
print("[INFO] Loading model and scalers...")
model = load_model(MODEL_PATH)
scalers = joblib.load(SCALER_PATH)
scaler_x, scaler_y = scalers["scaler_x"], scalers["scaler_y"]

# ---------------------------------------------------------------
# 2️⃣ Load Dataset
# ---------------------------------------------------------------
print("[INFO] Loading dataset...")
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip().lower() for c in df.columns]
print("[INFO] Columns:", list(df.columns))

# Automatically detect numerical feature columns and AQI target
num_cols = ['pm2.5', 'pm10', 'no2', 'so2', 'co', 'o3', 'temp', 'wind', 'humidity']
num_cols = [c for c in num_cols if c in df.columns]

# Detect AQI/target column
possible_targets = [c for c in df.columns if "aqi" in c.lower() or "index" in c.lower() or "value" in c.lower()]
target_col = possible_targets[0] if possible_targets else df.columns[-1]

print(f"[INFO] Using features: {num_cols}")
print(f"[INFO] Target column: {target_col}")

# Drop missing values
df = df.dropna(subset=num_cols + [target_col])
X = df[num_cols].values
y = df[target_col].values.reshape(-1, 1)

# ---------------------------------------------------------------
# 3️⃣ Preprocess Data
# ---------------------------------------------------------------
X_scaled = scaler_x.transform(X)
y_scaled = scaler_y.transform(y)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# ---------------------------------------------------------------
# 4️⃣ Generate Predictions
# ---------------------------------------------------------------
print("[INFO] Generating predictions...")
y_pred_scaled = model.predict(X_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

df["Predicted_AQI"] = y_pred
df["Error"] = df["Predicted_AQI"] - df[target_col]

# ---------------------------------------------------------------
# 5️⃣ Compute Evaluation Metrics
# ---------------------------------------------------------------
rmse = np.sqrt(mean_squared_error(df[target_col], df["Predicted_AQI"]))
r2 = r2_score(df[target_col], df["Predicted_AQI"])
print(f"[RESULT] RMSE = {rmse:.2f}, R² = {r2:.3f}")

# ---------------------------------------------------------------
# 6️⃣ Identify Top Pollutant Contributor (by correlation)
# ---------------------------------------------------------------
corrs = df[num_cols + [target_col]].corr()[target_col].drop(target_col).abs().sort_values(ascending=False)
top_pollutant = corrs.index[0]
top_contrib = corrs.iloc[0] * 100
print(f"[INFO] Top pollutant contributor: {top_pollutant.upper()} ({top_contrib:.2f}%)")

# ---------------------------------------------------------------
# 7️⃣ Save Predictions CSV
# ---------------------------------------------------------------
df.to_csv(RESULT_PATH, index=False)
print(f"[INFO] ✅ Saved detailed predictions → {RESULT_PATH}")

# ---------------------------------------------------------------
# 8️⃣ Save JSON Report (auto-handles missing column names)
# ---------------------------------------------------------------
# Ensure column consistency
true_col = target_col if target_col in df.columns else df.columns[-2]
sample_cols = [true_col, "Predicted_AQI"]

if all(c in df.columns for c in sample_cols):
    sample_data = (
        df[sample_cols]
        .head(10)
        .rename(columns={true_col: "Actual_AQI"})
        .to_dict(orient="records")
    )
else:
    sample_data = []

report = {
    "Model": "Hybrid AIS + HSA CNN-LSTM",
    "Dataset": os.path.basename(DATA_PATH),
    "Features_Used": num_cols,
    "Target": true_col,
    "Results": {
        "RMSE": float(rmse),
        "R2_Score": float(r2),
        "Top_Pollutant": top_pollutant,
        "Contribution(%)": round(float(top_contrib), 2),
    },
    "Sample_Predictions": sample_data,
}

with open(REPORT_PATH, "w") as f:
    json.dump(report, f, indent=4)

print(f"[INFO] ✅ Final report saved → {REPORT_PATH}")

# ---------------------------------------------------------------
# 9️⃣ Print Summary
# ---------------------------------------------------------------
print("\n🧩 AIRSAGE HYBRID PREDICTION SUMMARY")
print("---------------------------------------------------")
print(f"Dataset         : {os.path.basename(DATA_PATH)}")
print(f"Total Samples   : {len(df)}")
print(f"RMSE            : {rmse:.2f}")
print(f"R² Score        : {r2:.3f}")
print(f"Top Pollutant   : {top_pollutant.upper()} ({top_contrib:.2f}%)")
print(f"Predictions CSV : {RESULT_PATH}")
print(f"Report JSON     : {REPORT_PATH}")


[INFO] Loading model and scalers...
[INFO] Loading dataset...
[INFO] Columns: ['date', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3']
[INFO] Using features: ['pm10', 'no2', 'so2', 'co', 'o3']
[INFO] Target column: nh3
[INFO] Generating predictions...
[RESULT] RMSE = 8.84, R² = 0.888
[INFO] Top pollutant contributor: CO (78.50%)
[INFO] ✅ Saved detailed predictions → C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\hybrid_AirSage_final_predictions.csv
[INFO] ✅ Final report saved → C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\hybrid_AirSage_final_report.json

🧩 AIRSAGE HYBRID PREDICTION SUMMARY
---------------------------------------------------
Dataset         : delhi_aqi.csv
Total Samples   : 18776
RMSE            : 8.84
R² Score        : 0.888
Top Pollutant   : CO (78.50%)
Predictions CSV : C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\hybrid_AirSage_final_pred