In [1]:
import numpy as np
from tqdm import tqdm
from scipy.special import expit 
from sklearn.datasets import load_breast_cancer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [28]:
data = load_breast_cancer()
x, y = data.data, data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [29]:
KNN = KNeighborsClassifier(n_neighbors=1)

In [30]:
KNN.fit(x_train, y_train) 
print(f'accuracy: {np.sum(y_test == KNN.predict(x_test)) / len(x_test)}'); 

accuracy: 0.9148936170212766


In [16]:
model = modelWrapper(x, y) 
optimizer = PSO(30, x.shape[1], model) 
features = optimizer.select_features(num_iter=10) 

100%|██████████| 10/10 [00:14<00:00,  1.45s/it]


In [17]:
model.evaluate(features)

0.9680851063829787

In [14]:
class modelWrapper(): 
    def __init__(self, x, y, model=None) : 
        self.model= model or KNeighborsClassifier(n_neighbors=1)
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x, y, test_size=0.33)       
    def evaluate(self, features) : 
        self.model.fit(self.x_train[:, np.where(features)[0]], self.y_train)
        return np.sum(self.model.predict(self.x_test[:, np.where(features)[0]]) == self.y_test) / len(self.x_test)
        
class Particle : 
    
    def __init__(self, dimension) : 
        self.position=np.random.randint(0, 2, size=(dimension))
        self.velocity=np.zeros(dimension)
        self.memory=self.position
    
class PSO : 
    
    def __init__(self,
                 num_particles=50, 
                 dimension=30,
                 model=None) :
        
        self.model = model 
        self.dimension=dimension
        self.particles = [Particle(dimension) for _ in range(num_particles)]
        self.best, _ = self.update_global_best()
        
    def select_features(self, num_iter=100, w=0.8, c1=2, c2=2): 
        for _ in tqdm(range(num_iter)) : 
            for p in self.particles :    
                r1, r2 = np.random.rand(2)
                p.velocity = w * p.velocity + r1 * c1 * (p.memory - p.position) + \
                            r2 * c2 * (self.best.position - p.position)
                p.velocity = np.clip(p.velocity, -4, 4)
                p.position = (np.random.rand(self.dimension) < expit(p.velocity)).astype(int)
                self.update_memory(p) 
            self.best, score = self.update_global_best()                
        return self.best.position

    def update_global_best(self) : 
        best = self.particles[0] 
        max_score = self.model.evaluate(best.position)
        for p in self.particles : 
            score = self.model.evaluate(p.position)
            if score > max_score : 
                max_score = score 
                best = p 
        return best, max_score
    
    def update_memory(self, p) : 
        if self.model.evaluate(p.position) > self.model.evaluate(p.memory) : 
            p.memory = p.position 