In [1]:
import os
import json
import yaml
import joblib
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# ---------------------------------------------------------------
# 1️⃣ Paths
# ---------------------------------------------------------------
DATA_PATH = r"C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\archive\delhi_aqi.csv"
OUTPUT_DIR = r"C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---------------------------------------------------------------
# 2️⃣ Load Data
# ---------------------------------------------------------------
print("[INFO] Loading dataset...")
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip().lower() for c in df.columns]
print("[INFO] Shape:", df.shape)

num_cols = ['pm2.5', 'pm10', 'no2', 'so2', 'co', 'o3', 'temp', 'wind', 'humidity']
num_cols = [c for c in num_cols if c in df.columns]
target_col = 'aqi' if 'aqi' in df.columns else df.columns[-1]
df = df.dropna(subset=num_cols + [target_col])

X = df[num_cols].values
y = df[target_col].values.reshape(-1, 1)

# ---------------------------------------------------------------
# 3️⃣ Correlation Heatmap
# ---------------------------------------------------------------
plt.figure(figsize=(10, 7))
sns.heatmap(df[num_cols + [target_col]].corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("🌡️ Air Quality Feature Correlation Heatmap")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_heatmap.png"))
plt.close()
print("[PLOT] ✅ Saved heatmap.")

# ---------------------------------------------------------------
# 4️⃣ Data Scaling
# ---------------------------------------------------------------
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# ---------------------------------------------------------------
# 5️⃣ Hybrid CNN-LSTM + PSO
# ---------------------------------------------------------------
def build_model(filters, lstm_units, dropout_rate, input_shape):
    model = Sequential([
        Conv1D(int(filters), kernel_size=1, activation='relu', input_shape=input_shape),
        LSTM(int(lstm_units), return_sequences=False),
        Dropout(dropout_rate),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def evaluate_model(filters, lstm_units, dropout_rate):
    model = build_model(filters, lstm_units, dropout_rate, (X_scaled.shape[1], X_scaled.shape[2]))
    es = EarlyStopping(patience=2, restore_best_weights=True)
    model.fit(X_scaled, y_scaled, epochs=5, batch_size=32, verbose=0, callbacks=[es])
    preds = model.predict(X_scaled)
    rmse = np.sqrt(mean_squared_error(y_scaled, preds))
    return rmse

n_particles, n_iter = 5, 5
particles = np.array([[random.randint(16, 64), random.randint(16, 64), random.uniform(0.1, 0.4)] for _ in range(n_particles)])
velocities = np.zeros_like(particles)
pbest, pbest_scores = particles.copy(), np.array([1e9]*n_particles)
gbest, gbest_score = None, 1e9

print("[INFO] Starting PSO optimization...")
for it in range(n_iter):
    print(f"[PSO] Iter {it+1}/{n_iter}")
    for i, p in enumerate(particles):
        rmse = evaluate_model(*p)
        if rmse < pbest_scores[i]:
            pbest_scores[i], pbest[i] = rmse, p
        if rmse < gbest_score:
            gbest_score, gbest = rmse, p
    velocities = 0.5*velocities + 1.0*np.random.rand()*(pbest - particles) + 1.5*np.random.rand()*(gbest - particles)
    particles += velocities
print(f"[PSO] ✅ Best: filters={gbest[0]:.0f}, lstm={gbest[1]:.0f}, dropout={gbest[2]:.2f}, RMSE={gbest_score:.4f}")

# ---------------------------------------------------------------
# 6️⃣ Final Model Training
# ---------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
model = build_model(gbest[0], gbest[1], gbest[2], (X_train.shape[1], X_train.shape[2]))
es = EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=50, batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[es], verbose=1
)

# ---------------------------------------------------------------
# 7️⃣ Accuracy (Loss/MAE) Graph
# ---------------------------------------------------------------
plt.figure(figsize=(8,5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('📈 AirSage Training & Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_accuracy_graph.png"))
plt.close()
print("[PLOT] ✅ Saved accuracy/loss graph.")

# ---------------------------------------------------------------
# 8️⃣ Predictions
# ---------------------------------------------------------------
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_inv = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred))
r2 = r2_score(y_test_inv, y_pred)
print(f"[RESULT] RMSE={rmse:.2f}, R²={r2:.3f}")

# ---------------------------------------------------------------
# 9️⃣ Actual vs Predicted Comparison Graph
# ---------------------------------------------------------------
plt.figure(figsize=(8,5))
plt.scatter(range(len(y_test_inv)), y_test_inv, label='Actual AQI', s=20)
plt.scatter(range(len(y_pred)), y_pred, label='Predicted AQI', s=20)
plt.title("📊 Actual vs Predicted AQI Comparison")
plt.xlabel("Samples")
plt.ylabel("AQI")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_comparison_graph.png"))
plt.close()
print("[PLOT] ✅ Saved comparison graph.")

# ---------------------------------------------------------------
# 🔟 Result Summary Graph (Bar)
# ---------------------------------------------------------------
metrics = {'RMSE': rmse, 'R² Score': r2}
plt.figure(figsize=(5,4))
sns.barplot(x=list(metrics.keys()), y=list(metrics.values()), palette='viridis')
plt.title("📊 AirSage Model Performance Summary")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_result_graph.png"))
plt.close()
print("[PLOT] ✅ Saved result summary graph.")

# ---------------------------------------------------------------
# 1️⃣1️⃣ Prediction Trend Graph
# ---------------------------------------------------------------
plt.figure(figsize=(10,5))
plt.plot(y_test_inv[:100], label='Actual AQI')
plt.plot(y_pred[:100], label='Predicted AQI', linestyle='--')
plt.title("🌬️ AQI Prediction Trend (first 100 samples)")
plt.xlabel("Time Step")
plt.ylabel("AQI Value")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_prediction_graph.png"))
plt.close()
print("[PLOT] ✅ Saved prediction trend graph.")

# ---------------------------------------------------------------
# 1️⃣2️⃣ Save Artifacts
# ---------------------------------------------------------------
model.save(os.path.join(OUTPUT_DIR, "AirSage_model.h5"))
joblib.dump({'scaler_x': scaler_x, 'scaler_y': scaler_y}, os.path.join(OUTPUT_DIR, "AirSage_scalers.pkl"))

config = {
    "model_type": "Hybrid CNN-LSTM + PSO",
    "features": num_cols,
    "target": target_col,
    "best_params": {
        "filters": float(gbest[0]),
        "lstm_units": float(gbest[1]),
        "dropout": float(gbest[2])
    }
}
with open(os.path.join(OUTPUT_DIR, "AirSage_config.yaml"), "w") as f:
    yaml.dump(config, f)

results = {
    "RMSE": float(rmse),
    "R2_Score": float(r2),
    "Sample_Predictions": y_pred[:10].flatten().tolist()
}
with open(os.path.join(OUTPUT_DIR, "AirSage_results.json"), "w") as f:
    json.dump(results, f, indent=4)

print("\n✅ All outputs generated successfully in:")
print(OUTPUT_DIR)
print("🧾 Generated files:")
print(" - AirSage_model.h5")
print(" - AirSage_scalers.pkl")
print(" - AirSage_config.yaml")
print(" - AirSage_results.json")
print(" - AirSage_heatmap.png")
print(" - AirSage_accuracy_graph.png")
print(" - AirSage_comparison_graph.png")
print(" - AirSage_result_graph.png")
print(" - AirSage_prediction_graph.png")



[INFO] Loading dataset...
[INFO] Shape: (18776, 9)


  plt.tight_layout()
  plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_heatmap.png"))


[PLOT] ✅ Saved heatmap.
[INFO] Starting PSO optimization...
[PSO] Iter 1/5




[PSO] Iter 2/5
[PSO] Iter 3/5
[PSO] Iter 4/5
[PSO] Iter 5/5
[PSO] ✅ Best: filters=77, lstm=48, dropout=0.35, RMSE=0.0316
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
[PLOT] ✅ Saved accuracy/loss graph.


  plt.tight_layout()
  plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_accuracy_graph.png"))


[RESULT] RMSE=8.72, R²=0.893


  plt.tight_layout()
  plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_comparison_graph.png"))


[PLOT] ✅ Saved comparison graph.
[PLOT] ✅ Saved result summary graph.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=list(metrics.keys()), y=list(metrics.values()), palette='viridis')
  plt.tight_layout()
  plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_result_graph.png"))
  plt.tight_layout()


[PLOT] ✅ Saved prediction trend graph.

✅ All outputs generated successfully in:
C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System
🧾 Generated files:
 - AirSage_model.h5
 - AirSage_scalers.pkl
 - AirSage_config.yaml
 - AirSage_results.json
 - AirSage_heatmap.png
 - AirSage_accuracy_graph.png
 - AirSage_comparison_graph.png
 - AirSage_result_graph.png
 - AirSage_prediction_graph.png


  plt.savefig(os.path.join(OUTPUT_DIR, "AirSage_prediction_graph.png"))
  saving_api.save_model(
