In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/aid-vgg16/aid_vgg16_features.npy
/kaggle/input/aid-vgg16/aid_vgg16_labels.npy
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_59.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_154.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_257.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_317.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_223.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_133.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_108.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_24.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_170.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_53.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mountain/mountain_71.jpg
/kaggle/input/aid-scene-classification-datasets/AID/Mo

In [2]:
import os
import time
from pathlib import Path
from collections import Counter

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

In [3]:
DATA_DIR = "../input/aid-scene-classification-datasets/AID"   # root folder with 30 class subfolders
BATCH_SIZE = 16
NUM_WORKERS = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
OUTPUT_FEATURES_PATH = "aid_vgg16_features.npy"
OUTPUT_LABELS_PATH = "aid_vgg16_labels.npy"

In [4]:
#image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),                  #vgg16 only accepts this input shape of image 
    transforms.ToTensor(),                          #converts the image into a pytorch tensor, changes datatype and brings the values between 0 and 1
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [5]:
dataset = datasets.ImageFolder(root=DATA_DIR, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=True)
print(f"Dataset : {dataset}")

Dataset : Dataset ImageFolder
    Number of datapoints: 10000
    Root location: ../input/aid-scene-classification-datasets/AID
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )


In [6]:
print(f"Loaded dataset with {len(dataset)} images across {len(dataset.classes)} classes.")
print("Classes:", dataset.classes)

Loaded dataset with 10000 images across 30 classes.
Classes: ['Airport', 'BareLand', 'BaseballField', 'Beach', 'Bridge', 'Center', 'Church', 'Commercial', 'DenseResidential', 'Desert', 'Farmland', 'Forest', 'Industrial', 'Meadow', 'MediumResidential', 'Mountain', 'Park', 'Parking', 'Playground', 'Pond', 'Port', 'RailwayStation', 'Resort', 'River', 'School', 'SparseResidential', 'Square', 'Stadium', 'StorageTanks', 'Viaduct']


In [7]:
#load vgg16 dataset
vgg16 = models.vgg16(pretrained=True)

# Remove final classification layer → keep up to 4096-dim feature, the final layer has 1000 classes as per training on ImageNet,
#so we are removing that last layer
vgg16.classifier = nn.Sequential(*list(vgg16.classifier.children())[:-1])
vgg16 = vgg16.to(DEVICE)
vgg16.eval()

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 188MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [8]:
# Extract features
all_features = []
all_labels = []

from tqdm import tqdm

with torch.no_grad():
    for images, labels in tqdm(dataloader, desc="Extracting features"):
        images = images.to(DEVICE)
        outputs = vgg16(images)             # [batch_size, 4096]
        all_features.append(outputs.cpu().numpy())
        all_labels.append(labels.numpy())

Extracting features: 100%|██████████| 625/625 [00:53<00:00, 11.61it/s]


In [9]:
all_features = np.concatenate(all_features, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

print("Feature array shape:", all_features.shape)
print("Labels array shape:", all_labels.shape)

# 6. Save to disk
np.save(OUTPUT_FEATURES_PATH, all_features)
np.save(OUTPUT_LABELS_PATH, all_labels)

print(f"\nSaved features -> {OUTPUT_FEATURES_PATH}")
print(f"Saved labels   -> {OUTPUT_LABELS_PATH}")
print("\nFeature extraction complete ✅")

Feature array shape: (10000, 4096)
Labels array shape: (10000,)

Saved features -> aid_vgg16_features.npy
Saved labels   -> aid_vgg16_labels.npy

Feature extraction complete ✅


In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [11]:
X = np.load("../input/aid-vgg16/aid_vgg16_features.npy")
y = np.load("../input/aid-vgg16/aid_vgg16_labels.npy")

#I have saved the datasets after extracting using gpu

In [12]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=67)

In [13]:
# Train Logistic Regression
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [14]:
# Evaluate
pred = clf.predict(X_test)
acc = accuracy_score(y_test, pred)
print("Baseline Accuracy:", acc)

Baseline Accuracy: 0.879


In [15]:
print("Using monarch butterfly optimization techinique for feature selection")

Using monarch butterfly optimization techinique for feature selection


In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [17]:
# Load your dataset
X = np.load("../input/aid-vgg16/aid_vgg16_features.npy")
y = np.load("../input/aid-vgg16/aid_vgg16_labels.npy")
n_features = X.shape[1]
print(f"Done loading the data, your data has {n_features} features")

Done loading the data, your data has 4096 features


In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Scaled the data")

Scaled the data


In [19]:
# Define the fitness function for MBO
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                   random_state = 67
                                                   )


def fitness_function(selected_features, X_train_data, X_test_data, y_train_data, y_test_data):
    print("Started evaluating the fitness function...")
    X_selected = selected_features.astype(bool)
    X_train_selected = X_train_data[:, X_selected]
    X_test_selected = X_test_data[:, X_selected]

    if X_train_selected.shape[1] == 0:
        return 0.0
    
    clf = LogisticRegression(max_iter=200)
    clf.fit(X_train_selected, y_train_data)
    y_pred = clf.predict(X_test_selected)
    return accuracy_score(y_test_data, y_pred)

In [20]:
n_butterflies = 5
max_iter = 1
pm = 0.1
migration_prob = 0.7
print("For the initial data:")

For the initial data:


In [21]:
# Initialize population randomly (binary vectors for feature selection)
population = np.random.randint(0, 2, size=(n_butterflies, n_features))
print("Calculating initial fitness of the population...")

fitness_scores = np.array([
    fitness_function(ind, X_train, X_test, y_train, y_test) 
    for ind in population
])

#convergence warning is not a problem

Calculating initial fitness of the population...
Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
print(f"The fitness values of the initial population are : {fitness_scores}")

The fitness values of the initial population are : [0.8495 0.8555 0.8585 0.8505 0.8515]


In [23]:
#MBO main loop
print("-------------Starting MBO main loop-------------")
for it in range(max_iter):
    new_population = population.copy()
    for i in range(n_butterflies):
        print(f"🦋Butterfly : {i}")
        print()
        #migration
        if np.random.rand() < migration_prob:
            partner_idx = np.random.randint(0, n_butterflies)
            new_population[i] = (population[i]+population[partner_idx]>0) > 0
        # Butterfly adjusting operator (mutation)
        for j in range(n_features):
            if np.random.rand() < pm:
                new_population[i, j] = 1 - new_population[i, j]
        # Evaluate new population
        new_fitness = np.array([
            fitness_function(ind, X_train, X_test, y_train, y_test)
            for ind in new_population
        ])

        if new_fitness[i] > fitness_scores[i]:
                population[i] = new_population[i]
                fitness_scores[i] = new_fitness[i]
                print("Found a better version, so replacing the butterfly")

    for i in range(n_butterflies):
        if new_fitness[i] > fitness_scores[i]:
            population[i] = new_population[i]
            fitness_scores[i] = new_fitness[i]
    print(f"Iteration {it+1}, Best Accuracy: {fitness_scores.max():.4f}")

# Get the best feature subset
best_idx = np.argmax(fitness_scores)
best_features = population[best_idx].astype(bool)
print("Number of selected features:", best_features.sum())

-------------Starting MBO main loop-------------
🦋Butterfly : 0

Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Found a better version, so replacing the butterfly
🦋Butterfly : 1

Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Found a better version, so replacing the butterfly
🦋Butterfly : 2

Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Found a better version, so replacing the butterfly
🦋Butterfly : 3

Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Found a better version, so replacing the butterfly
🦋Butterfly : 4

Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Started evaluating the fitness function...
Iteration 1, Best Accuracy: 0.8600
Number of selected features: 2070


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [24]:
X_sel = X[:, best_features]
X_train, X_test, y_train, y_test = train_test_split(X_sel, y, test_size=0.2, stratify=y, random_state=42)
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Final Accuracy with MBO-selected features:", accuracy_score(y_test, y_pred))

Final Accuracy with MBO-selected features: 0.853


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
