In [2]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from catboost import CatBoostClassifier
from mealpy.swarm_based.WOA import OriginalWOA  # Correct import with capital O in OriginalWOA
from mealpy.utils.problem import Problem

# 2. Load Dataset
df = pd.read_csv("../Datasets/ovariantotal.csv")  # Adjust path as needed
X = df.iloc[:, :-1].values  # Features as numpy array
y = df.iloc[:, -1].values   # Labels as numpy array

# 3. Scale Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. Define Fitness Function (with max 20 features)
def fitness(solution):
    binary_solution = np.where(solution > 0.5, 1, 0)
    n_feats = np.sum(binary_solution)
    if n_feats == 0 or n_feats > 20:
        return 1.0  # Penalize empty or too large subsets

    X_sel = X_scaled[:, binary_solution == 1]

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    param_grid = {
        'depth': [4, 6],
        'learning_rate': [0.05],
        'iterations': [100]
    }

    model = CatBoostClassifier(verbose=0, random_state=42)
    grid = GridSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
    grid.fit(X_sel, y, early_stopping_rounds=20, eval_set=[(X_sel, y)])

    best_score = grid.best_score_
    return 1 - best_score  # Minimize error

# 5. Setup Problem for WOA
problem = Problem(
    fit_func=fitness,
    lb=[0] * X.shape[1],
    ub=[1] * X.shape[1],
    minmax="min",
    verbose=True
)

# 6. Run WOA
woa = OriginalWOA(
    epoch=15,         # Number of iterations, increase for better results but longer time
    pop_size=10       # Population size, increase for better exploration
)
best_position, best_fitness = woa.solve(problem)

print("\n✅ Best fitness (1 - accuracy):", best_fitness)
print("✅ Estimated accuracy:", 1 - best_fitness)

selected_mask = np.where(best_position > 0.5, 1, 0)
print("✅ Number of selected features:", np.sum(selected_mask))
print("✅ Selected feature indices:", np.where(selected_mask == 1)[0])

# 7. Train/Test Split using selected features
X_selected = X_scaled[:, selected_mask == 1]
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y, test_size=0.2, stratify=y, random_state=42
)

# 8. Train Final CatBoost Model
final_model = CatBoostClassifier(
    depth=6,
    learning_rate=0.05,
    iterations=100,
    verbose=0,
    random_state=42
)
final_model.fit(X_train, y_train, early_stopping_rounds=20, eval_set=(X_test, y_test))

# 9. Evaluate
y_pred = final_model.predict(X_test)
print("\n📊 Test Accuracy:", accuracy_score(y_test, y_pred))
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🔍 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


TypeError: Problem.__init__() missing 1 required positional argument: 'bounds'