In [3]:
!pip install numpy pandas scikit-learn matplotlib


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [6]:
data = pd.read_csv('data.csv')
data = data.drop(['id','Unnamed: 32'], axis=1)

# Map labels M=1, B=0
data['diagnosis'] = data['diagnosis'].map({'M':1, 'B':0})

X = data.drop('diagnosis', axis=1).values
y = data['diagnosis'].values


In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42
)


In [8]:
def fitness_function(features):
    selected = [i for i in range(len(features)) if features[i] == 1]
    if len(selected) == 0:
        return 0
    
    clf = SVC()
    clf.fit(X_train[:, selected], y_train)
    y_pred = clf.predict(X_test[:, selected])
    
    return accuracy_score(y_test, y_pred)


In [9]:
import random

def WOA(population_size=10, iterations=20):
    dim = X_train.shape[1]
    whales = np.random.randint(0, 2, (population_size, dim))
    
    best_whale = whales[0].copy()
    best_score = fitness_function(best_whale)

    for t in range(iterations):
        a = 2 - t * (2 / iterations)

        for i in range(population_size):
            r = np.random.random(dim)
            A = 2 * a * r - a
            C = 2 * r

            if np.random.random() < 0.5:
                D = np.abs(C * best_whale - whales[i])
                new_pos = best_whale - A * D
            else:
                rand_whale = whales[np.random.randint(0, population_size)]
                D = np.abs(C * rand_whale - whales[i])
                new_pos = rand_whale - A * D

            new_pos = np.where(new_pos > 0.5, 1, 0)

            score = fitness_function(new_pos)

            if score > best_score:
                best_score = score
                best_whale = new_pos.copy()

        print(f"Iteration {t+1}/{iterations}: Best Accuracy = {best_score:.4f}")

    return best_whale, best_score


In [10]:
best_features, best_accuracy = WOA()

print("Best Accuracy:", best_accuracy)
print("Selected feature indices:", np.where(best_features == 1)[0])


Iteration 1/20: Best Accuracy = 0.9649
Iteration 2/20: Best Accuracy = 0.9708
Iteration 3/20: Best Accuracy = 0.9766
Iteration 4/20: Best Accuracy = 0.9825
Iteration 5/20: Best Accuracy = 0.9883
Iteration 6/20: Best Accuracy = 0.9883
Iteration 7/20: Best Accuracy = 0.9883
Iteration 8/20: Best Accuracy = 0.9883
Iteration 9/20: Best Accuracy = 0.9883
Iteration 10/20: Best Accuracy = 0.9883
Iteration 11/20: Best Accuracy = 0.9883
Iteration 12/20: Best Accuracy = 0.9883
Iteration 13/20: Best Accuracy = 0.9883
Iteration 14/20: Best Accuracy = 0.9883
Iteration 15/20: Best Accuracy = 0.9883
Iteration 16/20: Best Accuracy = 0.9883
Iteration 17/20: Best Accuracy = 0.9883
Iteration 18/20: Best Accuracy = 0.9883
Iteration 19/20: Best Accuracy = 0.9883
Iteration 20/20: Best Accuracy = 0.9883
Best Accuracy: 0.9883040935672515
Selected feature indices: [ 1  2  3  4  5  8  9 10 11 12 14 15 16 17 22 26]


In [11]:
sel = np.where(best_features == 1)[0]

clf = SVC(probability=True)
clf.fit(X_train[:, sel], y_train)

pred = clf.predict(X_test[:, sel])

print(classification_report(y_test, pred))
print(confusion_matrix(y_test, pred))


              precision    recall  f1-score   support

           0       0.98      1.00      0.99       108
           1       1.00      0.97      0.98        63

    accuracy                           0.99       171
   macro avg       0.99      0.98      0.99       171
weighted avg       0.99      0.99      0.99       171

[[108   0]
 [  2  61]]


In [12]:
import json

with open("selected_features.json", "w") as f:
    json.dump({
        "selected_indices": sel.tolist(),
        "accuracy": float(best_accuracy)
    }, f)

print("Saved selected_features.json")


Saved selected_features.json
