In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from mealpy.swarm_based.WOA import OriginalWOA
from mealpy.utils.problem import Problem
import time

# =======================
# Load dataset
# =======================
data = pd.read_csv("../Datasets/ovariantotal.csv")

# Assuming last column is the target
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

print("Dataset shape:", X.shape)
print("Target distribution:", np.bincount(y))

# =======================
# Fitness Function
# =======================
def fitness_func(solution):
    """
    Fitness function for WOA:
    - Runs 5-fold CV on XGBoost
    - Returns negative mean accuracy (because WOA minimizes)
    """
    # Unpack hyperparameters from solution
    n_estimators = int(solution[0])
    max_depth = int(solution[1])
    learning_rate = solution[2]
    subsample = solution[3]
    colsample_bytree = solution[4]
    min_child_weight = int(solution[5])
    gamma = int(solution[6])

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []

    for train_idx, val_idx in skf.split(X, y):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = XGBClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            min_child_weight=min_child_weight,
            gamma=gamma,
            use_label_encoder=False,
            eval_metric='logloss',
            verbosity=0,
            random_state=42
        )

        model.fit(X_train, y_train)
        preds = model.predict(X_val)
        acc = accuracy_score(y_val, preds)
        scores.append(acc)

    return -np.mean(scores)

# =======================
# Parameter Bounds
# =======================
# These correspond to:
# [n_estimators, max_depth, learning_rate, subsample, colsample_bytree, min_child_weight, gamma]
lb = [200, 4, 0.01, 0.8, 0.8, 1, 0]
ub = [600, 8, 0.1, 1.0, 1.0, 3, 1]

# =======================
# Define the Problem
# =======================
problem = Problem(
    fit_func=fitness_func,
    lb=lb,
    ub=ub,
    minmax="min",
    verbose=False
)
# =======================
# Run WOA Multiple Times (for stability)
# =======================

best_solutions = []
best_scores = []
n_runs = 5  # you can reduce to 3 if you want even faster run

for run in range(n_runs):
    print(f"\n=== Running WOA {run + 1}/{n_runs} ===")
    
    woa = OriginalWOA(epoch=30, pop_size=10)  # balanced for <10min runs
    best_solution, best_fitness = woa.solve(problem)
    
    best_solutions.append(best_solution)
    best_scores.append(-best_fitness)  # accuracy is negative in fitness
    
    print(f"Best solution run {run + 1}: {best_solution}, Acc={-best_fitness:.4f}")

# =======================
# Stability Report
# =======================
best_solutions = np.array(best_solutions)
param_names = [
    "n_estimators", "max_depth", "learning_rate",
    "subsample", "colsample_bytree", "min_child_weight", "gamma"
]

print("\n=== Stability Report ===")
for i, name in enumerate(param_names):
    vals = best_solutions[:, i]
    print(f"{name}: mean={np.mean(vals):.4f}, std={np.std(vals):.4f}, chosen={np.round(np.mean(vals),2)}")

print(f"\nAvg Accuracy across runs: {np.mean(best_scores):.4f} ± {np.std(best_scores):.4f}")


Dataset shape: (349, 49)
Target distribution: [171 178]


2025/10/31 12:34:11 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: Solving single objective optimization problem.



=== Running WOA 1/5 ===


2025/10/31 12:34:29 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 1, Current best: -0.9112215320910974, Global best: -0.9112215320910974, Runtime: 9.76617 seconds
2025/10/31 12:34:39 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 2, Current best: -0.9112215320910974, Global best: -0.9112215320910974, Runtime: 10.61337 seconds
2025/10/31 12:34:54 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 3, Current best: -0.9140786749482401, Global best: -0.9140786749482401, Runtime: 14.51740 seconds
2025/10/31 12:35:13 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 4, Current best: -0.9140786749482401, Global best: -0.9140786749482401, Runtime: 18.73200 seconds
2025/10/31 12:35:24 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 5, Current best: -0.9140786749482401, Global best: -0.9140786749482401, Runtime: 10.95255 seconds
2025/10/31 12:35:36 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Ep

Best solution run 1: [3.50262971e+02 4.72863113e+00 5.12070615e-02 8.10781926e-01
 8.00000000e-01 1.48945727e+00 5.54434496e-01], Acc=0.9169

=== Running WOA 2/5 ===


2025/10/31 12:41:07 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 1, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 20.72178 seconds
2025/10/31 12:41:25 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 2, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 17.84390 seconds
2025/10/31 12:41:42 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 3, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 17.02137 seconds
2025/10/31 12:42:03 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 4, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 20.63358 seconds
2025/10/31 12:42:23 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 5, Current best: -0.9112215320910974, Global best: -0.9112215320910974, Runtime: 19.60020 seconds
2025/10/31 12:42:36 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, E

Best solution run 2: [6.00000000e+02 8.00000000e+00 8.34046084e-02 1.00000000e+00
 1.00000000e+00 2.99131164e+00 1.00000000e+00], Acc=0.9198

=== Running WOA 3/5 ===


2025/10/31 12:47:46 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 1, Current best: -0.9112215320910974, Global best: -0.9112215320910974, Runtime: 12.92366 seconds
2025/10/31 12:47:59 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 2, Current best: -0.914120082815735, Global best: -0.914120082815735, Runtime: 12.61276 seconds
2025/10/31 12:48:13 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 3, Current best: -0.914120082815735, Global best: -0.914120082815735, Runtime: 13.97078 seconds
2025/10/31 12:48:25 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 4, Current best: -0.914120082815735, Global best: -0.914120082815735, Runtime: 12.08642 seconds
2025/10/31 12:48:38 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 5, Current best: -0.914120082815735, Global best: -0.914120082815735, Runtime: 12.48062 seconds
2025/10/31 12:48:49 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 6,

Best solution run 3: [5.21212040e+02 8.00000000e+00 9.90520691e-02 1.00000000e+00
 1.00000000e+00 2.82120694e+00 1.00000000e+00], Acc=0.9255

=== Running WOA 4/5 ===


2025/10/31 12:53:23 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 1, Current best: -0.9169772256728779, Global best: -0.9169772256728779, Runtime: 15.18805 seconds
2025/10/31 12:53:37 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 2, Current best: -0.9169772256728779, Global best: -0.9169772256728779, Runtime: 14.16704 seconds
2025/10/31 12:53:50 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 3, Current best: -0.9169772256728779, Global best: -0.9169772256728779, Runtime: 12.77544 seconds
2025/10/31 12:54:02 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 4, Current best: -0.9169772256728779, Global best: -0.9169772256728779, Runtime: 12.05098 seconds
2025/10/31 12:54:17 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 5, Current best: -0.9169772256728779, Global best: -0.9169772256728779, Runtime: 14.75006 seconds
2025/10/31 12:54:29 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, E

Best solution run 4: [6.00000000e+02 8.00000000e+00 8.29067119e-02 1.00000000e+00
 1.00000000e+00 2.89354126e+00 1.00000000e+00], Acc=0.9198

=== Running WOA 5/5 ===


2025/10/31 12:59:22 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 1, Current best: -0.9082815734989648, Global best: -0.9082815734989648, Runtime: 13.28287 seconds
2025/10/31 12:59:33 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 2, Current best: -0.9082815734989648, Global best: -0.9082815734989648, Runtime: 10.58288 seconds
2025/10/31 12:59:44 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 3, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 11.77407 seconds
2025/10/31 12:59:58 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 4, Current best: -0.9083643892339545, Global best: -0.9083643892339545, Runtime: 13.65972 seconds
2025/10/31 01:00:12 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, Epoch: 5, Current best: -0.9112215320910974, Global best: -0.9112215320910974, Runtime: 14.14094 seconds
2025/10/31 01:00:23 PM, INFO, mealpy.swarm_based.WOA.OriginalWOA: >Problem: P, E

Best solution run 5: [4.16793486e+02 5.19180711e+00 5.74508521e-02 9.98426507e-01
 9.98434621e-01 2.90654267e+00 2.30860364e-01], Acc=0.9169

=== Stability Report ===
n_estimators: mean=497.6537, std=99.7667, chosen=497.65
max_depth: mean=6.7841, std=1.4964, chosen=6.78
learning_rate: mean=0.0748, std=0.0178, chosen=0.07
subsample: mean=0.9618, std=0.0755, chosen=0.96
colsample_bytree: mean=0.9597, std=0.0798, chosen=0.96
min_child_weight: mean=2.6204, std=0.5681, chosen=2.62
gamma: mean=0.7571, std=0.3146, chosen=0.76

Avg Accuracy across runs: 0.9198 ± 0.0032
