In [1]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LSTM, Conv1D, MaxPooling1D, Flatten, concatenate
)
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# ============================================================
# ðŸ“‚ Paths
# ============================================================
base_path = r"C:\Users\NXTWAVE\Downloads\Disease Outbreak Prediction & Health Risk Monitoring System"
data_path = os.path.join(base_path, "archive", "Weather-related disease prediction.csv")

model_path = os.path.join(base_path, "gwo_hho_medican_model.keras")
scaler_path = os.path.join(base_path, "gwo_hho_medican_scaler.pkl")
yaml_path = os.path.join(base_path, "gwo_hho_medican_config.yaml")
json_path = os.path.join(base_path, "gwo_hho_medican_prediction.json")
csv_path = os.path.join(base_path, "gwo_hho_medican_result.csv")
visual_dir = os.path.join(base_path, "visuals")

os.makedirs(visual_dir, exist_ok=True)

# ============================================================
# ðŸ“Š Load & Preprocess Data
# ============================================================
print("[INFO] Loading dataset...")
df = pd.read_csv(data_path)
print("[INFO] Shape:", df.shape)
print("[INFO] Columns:", df.columns.tolist())

# Keep numeric columns only
df = df.select_dtypes(include=[np.number]).dropna()
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df)

X = scaled[:, :-1]
y = scaled[:, -1]

# Split data
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Reshape for CNN-LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

joblib.dump(scaler, scaler_path)
print(f"[INFO] Scaler saved at: {scaler_path}")

# ============================================================
# ðŸ§  CNN-LSTM Model Builder
# ============================================================
def create_model(params):
    lr, dropout, lstm_units = params
    inputs = Input(shape=(X_train.shape[1], 1))

    cnn = Conv1D(filters=32, kernel_size=3, activation="relu")(inputs)
    cnn = MaxPooling1D(pool_size=2)(cnn)
    cnn = Flatten()(cnn)

    lstm = LSTM(int(lstm_units), return_sequences=False)(inputs)
    merged = concatenate([cnn, lstm])
    dense = Dense(64, activation="relu")(merged)
    dense = Dropout(dropout)(dense)
    output = Dense(1)(dense)

    model = Model(inputs, output)
    model.compile(optimizer=Adam(learning_rate=lr), loss="mse")
    return model

# ============================================================
# ðŸ¦´ Hybrid GWO + HHO Optimizer
# ============================================================
def objective_function(params):
    lr, dropout, lstm_units = params
    model = create_model([lr, dropout, lstm_units])
    model.fit(X_train, y_train, epochs=8, batch_size=32, verbose=0)
    preds = model.predict(X_test)
    return mean_squared_error(y_test, preds)

def hybrid_gwo_hho(opt_iters=6, pop_size=4):
    print("[INFO] Running Hybrid GWO + HHO optimization...")
    lb = [1e-4, 0.1, 16]
    ub = [1e-2, 0.5, 128]

    wolves = np.random.uniform(lb, ub, (pop_size, len(lb)))
    fitness = np.array([objective_function(w) for w in wolves])

    best_idx = np.argmin(fitness)
    best_wolf = wolves[best_idx].copy()
    best_score = fitness[best_idx]

    for t in range(opt_iters):
        a = 2 - t * (2 / opt_iters)
        for i in range(pop_size):
            r1, r2 = np.random.rand(), np.random.rand()
            A = 2 * a * r1 - a
            C = 2 * r2
            D = abs(C * best_wolf - wolves[i])
            new_pos = best_wolf - A * D

            E0 = 2 * np.random.rand() - 1
            E = 2 * (1 - (t / opt_iters))
            if abs(E) >= 1:
                new_pos = np.random.uniform(lb, ub)
            else:
                jump_strength = 2 * (1 - np.random.rand())
                new_pos = best_wolf - E * abs(jump_strength * best_wolf - wolves[i])

            new_pos = np.clip(new_pos, lb, ub)
            new_fit = objective_function(new_pos)
            if new_fit < fitness[i]:
                fitness[i] = new_fit
                wolves[i] = new_pos
            if new_fit < best_score:
                best_wolf = new_pos.copy()
                best_score = new_fit

        print(f"[INFO] Iter {t+1}/{opt_iters} | Best MSE: {best_score:.6f}")

    return best_wolf, best_score

# ============================================================
# ðŸ§© Run Hybrid Optimization
# ============================================================
best_params, best_score = hybrid_gwo_hho(opt_iters=4, pop_size=4)
print(f"[INFO] Best Parameters (lr, dropout, lstm_units): {best_params}")
print(f"[INFO] Best MSE: {best_score:.6f}")

# ============================================================
# ðŸ§  Final Model Training
# ============================================================
model = create_model(best_params)
history = model.fit(X_train, y_train, epochs=40, batch_size=32, validation_split=0.2, verbose=1)
model.save(model_path)
print(f"[INFO] Model saved at: {model_path}")

# ============================================================
# ðŸ“ˆ Evaluation
# ============================================================
y_pred = model.predict(X_test)

# Flatten arrays to avoid shape mismatch
y_pred = y_pred.flatten()
y_test = y_test.flatten()

mae = mean_absolute_error(y_test, y_pred)
try:
    rmse = mean_squared_error(y_test, y_pred, squared=False)
except TypeError:
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

metrics = {"MAE": float(mae), "RMSE": float(rmse), "R2": float(r2)}
print("[INFO] Metrics:", metrics)

# Save results
results_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
results_df.to_csv(csv_path, index=False)
print(f"[INFO] Results CSV saved at: {csv_path}")

with open(json_path, "w") as f:
    json.dump({"metrics": metrics, "best_params": [float(x) for x in best_params]}, f, indent=4)

with open(yaml_path, "w") as f:
    yaml.dump({"optimizer": "Hybrid GWO+HHO", "best_params": [float(x) for x in best_params]}, f)

# ============================================================
# ðŸŽ¨ Visualizations
# ============================================================
plt.figure()
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.savefig(os.path.join(visual_dir, "gwo_hho_medican_loss_graph.png"))
plt.close()

plt.figure()
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title("Prediction Comparison")
plt.savefig(os.path.join(visual_dir, "gwo_hho_medican_comparison_graph.png"))
plt.close()

# âœ… FIXED: flatten arrays before correlation DataFrame
corr_df = pd.DataFrame({
    "Actual": y_test.flatten(),
    "Predicted": y_pred.flatten()
})
plt.figure()
sns.heatmap(corr_df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.savefig(os.path.join(visual_dir, "gwo_hho_medican_heatmap.png"))
plt.close()

print("[âœ…] All outputs saved successfully in:")
print(base_path)



[INFO] Loading dataset...
[INFO] Shape: (5200, 51)
[INFO] Columns: ['Age', 'Gender', 'Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'nausea', 'joint_pain', 'abdominal_pain', 'high_fever', 'chills', 'fatigue', 'runny_nose', 'pain_behind_the_eyes', 'dizziness', 'headache', 'chest_pain', 'vomiting', 'cough', 'shivering', 'asthma_history', 'high_cholesterol', 'diabetes', 'obesity', 'hiv_aids', 'nasal_polyps', 'asthma', 'high_blood_pressure', 'severe_headache', 'weakness', 'trouble_seeing', 'fever', 'body_aches', 'sore_throat', 'sneezing', 'diarrhea', 'rapid_breathing', 'rapid_heart_rate', 'pain_behind_eyes', 'swollen_glands', 'rashes', 'sinus_headache', 'facial_pain', 'shortness_of_breath', 'reduced_smell_and_taste', 'skin_irritation', 'itchiness', 'throbbing_headache', 'confusion', 'back_pain', 'knee_ache', 'prognosis']
[INFO] Scaler saved at: C:\Users\NXTWAVE\Downloads\Disease Outbreak Prediction & Health Risk Monitoring System\gwo_hho_medican_scaler.pkl
[INFO] Running Hybrid GWO +