In [1]:
DATA_PATH = r"C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\archive\delhi_aqi.csv"
OUTPUT_DIR = r"C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System"

# ---------------------------------------------------------------
# 1️⃣ Import libraries
# ---------------------------------------------------------------
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# ---------------------------------------------------------------
# 2️⃣ Load Dataset
# ---------------------------------------------------------------
print("[INFO] Loading dataset...")
df = pd.read_csv(DATA_PATH)
print("[INFO] Shape:", df.shape)

# Rename or infer columns if necessary
df.columns = [col.strip().lower() for col in df.columns]

# Ensure the dataset has numeric pollutant columns
num_cols = ['pm2.5', 'pm10', 'no2', 'so2', 'co', 'o3', 'temp', 'wind', 'humidity']
num_cols = [c for c in num_cols if c in df.columns]
print("[INFO] Using features:", num_cols)

# Target variable (AQI or 'aqi' equivalent)
target_col = 'aqi' if 'aqi' in df.columns else df.columns[-1]

df = df.dropna(subset=num_cols + [target_col])
X = df[num_cols].values
y = df[target_col].values.reshape(-1, 1)

# ---------------------------------------------------------------
# 3️⃣ Data Scaling
# ---------------------------------------------------------------
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# reshape for LSTM (samples, timesteps, features)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# ---------------------------------------------------------------
# 4️⃣ PSO Optimizer for CNN-LSTM hyperparameters
# ---------------------------------------------------------------
print("[INFO] Running Particle Swarm Optimization (simplified)...")

import random

def build_model(filters, lstm_units, dropout_rate, input_shape):
    model = Sequential([
        Conv1D(int(filters), kernel_size=1, activation='relu', input_shape=input_shape),
        LSTM(int(lstm_units), return_sequences=False),
        Dropout(dropout_rate),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def evaluate_model(filters, lstm_units, dropout_rate):
    model = build_model(filters, lstm_units, dropout_rate, (X_scaled.shape[1], X_scaled.shape[2]))
    es = EarlyStopping(patience=2, restore_best_weights=True)
    model.fit(X_scaled, y_scaled, epochs=5, batch_size=32, verbose=0, callbacks=[es])
    preds = model.predict(X_scaled)
    rmse = np.sqrt(mean_squared_error(y_scaled, preds))
    return rmse

# PSO parameters
n_particles = 5
n_iterations = 5
w, c1, c2 = 0.5, 1.0, 1.5

# initialize particles
particles = np.array([[random.randint(16, 64), random.randint(16, 64), random.uniform(0.1, 0.4)] for _ in range(n_particles)])
velocities = np.zeros_like(particles)
pbest = particles.copy()
pbest_scores = np.array([1e9]*n_particles)
gbest = None
gbest_score = 1e9

for it in range(n_iterations):
    print(f"[PSO] Iter {it+1:02d}/{n_iterations}")
    for i, particle in enumerate(particles):
        f, l, d = particle
        rmse = evaluate_model(f, l, d)
        if rmse < pbest_scores[i]:
            pbest_scores[i] = rmse
            pbest[i] = particle
        if rmse < gbest_score:
            gbest_score = rmse
            gbest = particle
    velocities = w*velocities + c1*np.random.rand()*(pbest - particles) + c2*np.random.rand()*(gbest - particles)
    particles += velocities

print(f"[PSO] ✅ Best params: filters={gbest[0]:.0f}, lstm={gbest[1]:.0f}, dropout={gbest[2]:.2f}, RMSE={gbest_score:.4f}")

# ---------------------------------------------------------------
# 5️⃣ Train Final Model
# ---------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

final_model = build_model(gbest[0], gbest[1], gbest[2], (X_train.shape[1], X_train.shape[2]))

es = EarlyStopping(patience=10, restore_best_weights=True)
history = final_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test),
                          callbacks=[es], verbose=1)

# ---------------------------------------------------------------
# 6️⃣ Evaluate Model
# ---------------------------------------------------------------
y_pred_scaled = final_model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_inv = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred))
r2 = r2_score(y_test_inv, y_pred)
print(f"[RESULT] RMSE={rmse:.2f}, R²={r2:.3f}")

# ---------------------------------------------------------------
# 7️⃣ Save Artifacts
# ---------------------------------------------------------------
h5_path = os.path.join(OUTPUT_DIR, "AirSage_model.h5")
pkl_path = os.path.join(OUTPUT_DIR, "AirSage_scalers.pkl")
yaml_path = os.path.join(OUTPUT_DIR, "AirSage_config.yaml")
json_path = os.path.join(OUTPUT_DIR, "AirSage_results.json")

final_model.save(h5_path)
joblib.dump({'scaler_x': scaler_x, 'scaler_y': scaler_y}, pkl_path)

config = {
    "model_type": "Hybrid CNN-LSTM + PSO",
    "features": num_cols,
    "target": target_col,
    "best_params": {
        "filters": float(gbest[0]),
        "lstm_units": float(gbest[1]),
        "dropout": float(gbest[2])
    }
}
with open(yaml_path, "w") as f:
    yaml.dump(config, f)

results = {
    "RMSE": rmse,
    "R2_Score": r2,
    "Sample_Predictions": y_pred[:10].flatten().tolist()
}
with open(json_path, "w") as f:
    json.dump(results, f, indent=4)

print("\n[INFO] ✅ All files saved successfully!")
print(f"Model: {h5_path}")
print(f"Scalers: {pkl_path}")
print(f"Config: {yaml_path}")
print(f"Results: {json_path}")



[INFO] Loading dataset...
[INFO] Shape: (18776, 9)
[INFO] Using features: ['pm10', 'no2', 'so2', 'co', 'o3']
[INFO] Running Particle Swarm Optimization (simplified)...
[PSO] Iter 01/5



[PSO] Iter 02/5
[PSO] Iter 03/5
[PSO] Iter 04/5
[PSO] Iter 05/5
[PSO] ✅ Best params: filters=54, lstm=40, dropout=0.28, RMSE=0.0317
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
[RESULT] RMSE=8.78, R²=0.892

[INFO] ✅ All files saved successfully!
Model: C:\Users\NXTWAVE\Downloads\Air Quality Prediction and Pollution Source Mapping System\AirSage_model.h5
Scalers: C:\Users\NXTWAVE\Downloads\Air Quality Predic

  saving_api.save_model(
