In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression


# Generate synthetic dataset (moderate difficulty)
X, y = make_classification(
    n_samples=500,
    n_features=5,
    n_informative=2,
    n_redundant=1,
    class_sep=1.0,
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


# PSO PARAMETERS
num_particles = 3
num_features = X.shape[1]
max_iter = 10
w = 0.7      
c1 = 2
c2 = 2

particles = np.random.randint(0, 2, (num_particles, num_features))
velocities = np.random.uniform(-2, 2, (num_particles, num_features))

pbest = particles.copy()
pbest_fitness = np.zeros(num_particles)


# FITNESS FUNCTION
def fitness(mask):
    if mask.sum() == 0:
        return 0
    clf = LogisticRegression(max_iter=200)
    clf.fit(X_train[:, mask == 1], y_train)
    pred = clf.predict(X_test[:, mask == 1])
    return accuracy_score(y_test, pred)


for i in range(num_particles):
    pbest_fitness[i] = fitness(particles[i])

gbest_idx = np.argmax(pbest_fitness)
gbest = pbest[gbest_idx].copy()
gbest_fitness = pbest_fitness[gbest_idx]

print(f"Iteration 1:")
print(f"Initial global best: {gbest} ({gbest_fitness:.2f})\n")


for iteration in range(2, max_iter + 1):

    old_gbest_fitness = gbest_fitness

    for i in range(num_particles):

        # Velocity update with strong randomness
        velocities[i] = (
            w * velocities[i]
            + c1 * np.random.uniform(-1, 1, num_features) * (pbest[i] - particles[i])
            + c2 * np.random.uniform(-1, 1, num_features) * (gbest - particles[i])
        )

        # Binary update 
        sig = 1 / (1 + np.exp(-2 * velocities[i]))
        particles[i] = (sig > np.random.rand(num_features)).astype(int)

        fitness_value = fitness(particles[i])

        if fitness_value > pbest_fitness[i]:
            pbest[i] = particles[i].copy()
            pbest_fitness[i] = fitness_value

    # Update global best
    gbest_idx = np.argmax(pbest_fitness)
    gbest = pbest[gbest_idx].copy()
    gbest_fitness = pbest_fitness[gbest_idx]

    if gbest_fitness > old_gbest_fitness:
        print(f"Iteration {iteration}:")
        print(f"New global best: {gbest} ({gbest_fitness:.2f})\n")
    else:
        print(f"Iteration {iteration}:")
        print(f"No improvement, global best stays: {gbest} ({gbest_fitness:.2f})\n")

print("Final global best features:", gbest)
print("Final best fitness:", gbest_fitness)


Iteration 1:
Initial global best: [1 0 1 0 0] (0.87)

Iteration 2:
No improvement, global best stays: [1 0 1 0 0] (0.87)

Iteration 3:
No improvement, global best stays: [1 0 1 0 0] (0.87)

Iteration 4:
No improvement, global best stays: [1 0 1 0 0] (0.87)

Iteration 5:
New global best: [1 1 1 0 0] (0.87)

Iteration 6:
New global best: [0 1 0 1 0] (0.88)

Iteration 7:
No improvement, global best stays: [0 1 0 1 0] (0.88)

Iteration 8:
No improvement, global best stays: [0 1 0 1 0] (0.88)

Iteration 9:
No improvement, global best stays: [0 1 0 1 0] (0.88)

Iteration 10:
No improvement, global best stays: [0 1 0 1 0] (0.88)

Final global best features: [0 1 0 1 0]
Final best fitness: 0.88
