In [1]:
import numpy as np
import pandas as pd
import math
import random
from statistics import stdev
from time import process_time
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
start_time=process_time()
df = pd.read_csv('BreastCancer.csv')
tot_features=len(df.columns)-1
tot_features
x=df[df.columns[:tot_features]]
y=df[df.columns[-1]]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,stratify=y)
_classifier = KNeighborsClassifier(n_neighbors=5)
_classifier.fit(x_train, y_train)
predictions = _classifier.predict(x_test)
total_acc = accuracy_score(y_true = y_test, y_pred = predictions)
total_error = 1 - total_acc
total_features = tot_features
total_acc

0.6285714285714286

In [3]:
#controlling parameters
swarm_size=20
max_iterations = 20
alpha = 0.1
a1=3
a2=3.5
beta=0.1
d=3
fl=3
l=0.95
g=1
delta=0.9
num_harmonies=2*max_iterations
max_HS = 30
r_accept = 0.8
r_pa = 0.3
b_range = 2
gmax=9.8
gmin=6

In [4]:
#S-shaped trasnfer function
def transfer_func(velocity):
    t=[]
    for i in range(len(velocity)):
        t.append(abs(0.5-math.exp(velocity[i])/(1+(math.exp(velocity[i])))))
        #t.append(np.abs(velocity[j] / np.sqrt(1+ velocity[j]*velocity[j])))
    return t

In [5]:
#fitness function
def find_fitness(particle):
        features = []
        particle=transfer_func(particle).copy()
        for x in range(len(particle)):
            if particle[x]>=0.25:
                features.append(df.columns[x])
        if(len(features)==0):
            return 10000
        new_x_train = x_train[features].copy()
        new_x_test = x_test[features].copy()
             
        _classifier = KNeighborsClassifier(n_neighbors=5)
        _classifier.fit(new_x_train, y_train)
        predictions = _classifier.predict(new_x_test)
        acc = accuracy_score(y_true = y_test, y_pred = predictions)
        fitness = acc
        err=1-acc
        num_features = len(features)
        fitness = alpha*(num_features/total_features) + (1-alpha)*err
        return fitness

In [6]:
#initialization of population
male_swarm_vel = np.zeros((2*swarm_size,tot_features))
female_swarm_vel = np.zeros((2*swarm_size,tot_features))
male_swarm_pos = np.random.uniform(low=-1,high=1,size=(2*swarm_size,tot_features))
female_swarm_pos = np.random.uniform(low=-1,high=1,size=(2*swarm_size,tot_features))
#male_swarm_pos=np.where(male_swarm_pos>=0,1,0)
#female_swarm_pos=np.where(female_swarm_pos>=0,1,0)

In [7]:
gbest_fitness=100000
pbest_fitness = np.empty(swarm_size)
pbest_fitness.fill(np.inf)
pbest = np.zeros((swarm_size,tot_features))
gbest = np.zeros(tot_features)
male_fitness=np.empty((2*swarm_size))
female_fitness=np.empty((2*swarm_size))
vmax_male=np.empty(tot_features)
vmax_female=np.empty(tot_features)
offspring1=np.empty(tot_features)
offspring2=np.empty(tot_features)
HM=np.empty((num_harmonies,tot_features))

In [8]:
#sorting initially
for i in range(2*swarm_size):
    male_fitness[i]=find_fitness(male_swarm_pos[i])
sort_order=np.argsort(male_fitness)
male_fitness=male_fitness[sort_order]
male_swarm_pos=male_swarm_pos[sort_order]

for i in range(2*swarm_size):
    female_fitness[i]=find_fitness(female_swarm_pos[i])
sort_order=np.argsort(female_fitness)
female_fitness=female_fitness[sort_order] 
female_swarm_pos=female_swarm_pos[sort_order] 

In [9]:
#updating gbest
if male_fitness[0]<gbest_fitness:
    gbest_fitness=male_fitness[0]
    gbest=male_swarm_pos[0].copy()
    
if female_fitness[0]<gbest_fitness:
    gbest_fitness=female_fitness[0]
    gbest=female_swarm_pos[0].copy()

In [10]:
#HS
def HS(HM,gbest,gbest_fitness):
    for i in range(max_iterations, num_harmonies):
        HM[i]=np.random.uniform(low=-1,high=1,size=(tot_features))
    fitnesses=[]
    for i in range(num_harmonies):
            fitnesses.append(find_fitness(HM[i]))
    fitnesses = np.array(fitnesses)    
    sortidx = np.argsort(fitnesses)
    HM=HM[sortidx]
    #print(find_fitness(HM[0]))
    #print(HM)

    worst_fitness = np.max(fitnesses)
    for iter in range(max_HS):
        tmp = HM[0].copy()
        for i in range(tot_features):
            if np.random.random() < r_accept:
                choice = random.randint(0, num_harmonies-1)
                tmp[i] = HM[choice][i]
                if random.random() < r_pa:
                        tmp[i] += b_range * random.uniform(-1,1)
            else:
                tmp[i] = random.uniform(-1,1)
        cur_fitness = find_fitness(tmp) 
        if cur_fitness < worst_fitness:
            HM[num_harmonies-1] = tmp

            for i in range(num_harmonies):
                fitnesses[i] = find_fitness(HM[i])
            sortidx = np.argsort(fitnesses)    
            HM = HM[sortidx]
            worst_fitness = np.max(fitnesses) 
        #print("HM\n",HM)
    if find_fitness(HM[0])<gbest_fitness:
        gbest = HM[0].copy()
        gbest_fitness = find_fitness(HM[0])
    return (gbest,gbest_fitness)
    #print(gbest_fitness)

In [None]:
#main MA-HS
for itr in range(max_iterations):
    HM[itr]=male_swarm_pos[0].copy()
        
    #updating vmax
    for j in range(tot_features):
        r=np.random.normal(0,1)
        vmax_male[j]=(male_swarm_pos[0][j]-male_swarm_pos[swarm_size-1][j])*r
    for j in range(tot_features):
        r=np.random.normal(0,1)
        vmax_female[j]=(female_swarm_pos[0][j]-female_swarm_pos[swarm_size-1][j])*r
        
    for i in range(swarm_size):
        
        if male_fitness[i]<gbest_fitness:
            gbest_fitness=male_fitness[i]
            gbest=male_swarm_pos[i].copy()
        
        if male_fitness[i]<pbest_fitness[i]:
            pbest_fitness[i]=male_fitness[i]
            pbest[i]=male_swarm_pos[i].copy()
        
        #velocity updation
        if i==0:
            for j in range(tot_features):
                male_swarm_vel[0][j]=male_swarm_vel[0][j]+d*np.random.uniform(-1,1)
        else:
            sum=0    
            for j in range(tot_features):
                sum=sum+(male_swarm_pos[i][j]-gbest[j])*(male_swarm_pos[i][j]-gbest[j])
            rg=math.sqrt(sum)
            sum=0
            for j in range(tot_features):
                sum=sum+(male_swarm_pos[i][j]-pbest[i][j])*(male_swarm_pos[i][j]-pbest[i][j])
            rp=math.sqrt(sum)
            for j in range(tot_features):
                male_swarm_vel[i][j]=g*male_swarm_vel[i][j]+a1*math.exp(-beta*rp*rp)*(pbest[i][j]-male_swarm_pos[i][j])+a2*math.exp(-beta*rg*rg)*(gbest[j]-male_swarm_pos[i][j])
        if male_fitness[i]<=female_fitness[i]:
            sum=0
            for j in range(tot_features):
                sum=sum+(male_swarm_pos[i][j]-female_swarm_pos[i][j])*(male_swarm_pos[i][j]-female_swarm_pos[i][j])
            rmf=math.sqrt(sum)
            female_swarm_vel[i][j]=g*female_swarm_vel[i][j]+a2*math.exp(-beta*rmf*rmf)*(male_swarm_pos[i][j]-female_swarm_pos[i][j])
        else:
            for j in range(tot_features):
                female_swarm_vel[i][j]=g*female_swarm_vel[i][j]+fl*np.random.uniform(-1,1)
            
    #sorting
    for i in range(swarm_size):
        male_fitness[i]=find_fitness(male_swarm_pos[i])
    sort_order=np.argsort(male_fitness)
    male_fitness=male_fitness[sort_order]
    male_swarm_pos=male_swarm_pos[sort_order]
    male_swarm_vel=male_swarm_vel[sort_order]
    
    for i in range(swarm_size):
        female_fitness[i]=find_fitness(female_swarm_pos[i])
    sort_order=np.argsort(female_fitness)
    female_fitness=female_fitness[sort_order]
    female_swarm_pos=female_swarm_pos[sort_order]
    female_swarm_vel=female_swarm_vel[sort_order]
    
    if male_fitness[0]<gbest_fitness:
        gbest_fitness=male_fitness[0]
        gbest=male_swarm_pos[0].copy()
        
    if female_fitness[0]<gbest_fitness:
        gbest_fitness=female_fitness[0]
        gbest=female_swarm_pos[0].copy()
     
    for i in range(swarm_size):
        #crossover and mutation
        partition=np.random.randint(tot_features//4,math.floor((3*tot_features//4)+1))
        for j in range(tot_features):
            offspring1[j]=l*male_swarm_pos[i][j]+(1-l)*female_swarm_pos[i][j]
            offspring2[j]=l*female_swarm_pos[i][j]+(1-l)*male_swarm_pos[i][j]
        if np.random.random()>=0.5:
            male_swarm_pos[swarm_size+i]=offspring1.copy()
            female_swarm_pos[swarm_size+i]=offspring2.copy()
        else:
            male_swarm_pos[swarm_size+i]=offspring2.copy()
            female_swarm_pos[swarm_size+i]=offspring1.copy()
        r=np.random.normal(0,1,tot_features)
        #sigma=stdev(r)
        for j in range(tot_features):
            male_swarm_pos[swarm_size+i][j]+=r[j]
            female_swarm_pos[swarm_size+i][j]+=r[j]
        for j in range(tot_features):
            male_swarm_vel[swarm_size+i][j]=0
            female_swarm_vel[swarm_size+i][j]=0
            
    #updating swarm position
    for i in range(swarm_size):
        for j in range(tot_features):
            male_swarm_pos[i][j]+=male_swarm_vel[i][j]
            female_swarm_pos[i][j]+=female_swarm_vel[i][j]
            
    #sorting
    for i in range(2*swarm_size):
        male_fitness[i]=find_fitness(male_swarm_pos[i])
    sort_order=np.argsort(male_fitness)
    male_fitness=male_fitness[sort_order]
    male_swarm_pos=male_swarm_pos[sort_order]
    male_swarm_vel=male_swarm_vel[sort_order]
    
    for i in range(2*swarm_size):
        female_fitness[i]=find_fitness(female_swarm_pos[i])
    sort_order=np.argsort(female_fitness)
    female_fitness=female_fitness[sort_order] 
    female_swarm_pos=female_swarm_pos[sort_order] 
    female_swarm_vel=female_swarm_vel[sort_order]
            
    #updating gravity and nuptial dance
    #g=gmax-((gmax-gmin)*itr/max_iterations)
    d=d*delta
    fl=fl*delta 
    if find_fitness(male_swarm_pos[0])<gbest_fitness:
        gbest_fitness=male_fitness[0]
        gbest=male_swarm_pos[0].copy()
        
    if find_fitness(female_swarm_pos[0])<gbest_fitness:
        gbest_fitness=female_fitness[0]
        gbest=female_swarm_pos[0].copy()
        
    (gbest,gbest_fitness)=HS(male_swarm_pos,gbest,gbest_fitness)
            
    print(gbest_fitness)

0.04285714285714281
0.03928571428571433
0.03928571428571433
0.03928571428571433
0.03928571428571433
0.03928571428571433
0.03642857142857141
0.03642857142857141
0.03642857142857141
0.03642857142857141
0.03642857142857141
0.03642857142857141
0.03642857142857141
0.03642857142857141


In [None]:
#gbest=transfer_func(gbest)
selected_features=transfer_func(gbest)
for j in range(tot_features):
    if selected_features[j]>=0.25:
        selected_features[j]=1
    else:
        selected_features[j]=0
number_of_selected_features = np.sum(selected_features)
print("NUM:",number_of_selected_features)

In [None]:
features=[]
for j in range(len(selected_features)):
    if selected_features[j]==1:
            features.append(df.columns[j])
if len(features)==0:
    acc=0
else:
    new_x_train = x_train[features]
    new_x_test = x_test[features]
    _classifier = KNeighborsClassifier(n_neighbors=5)
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    acc = accuracy_score(y_true = y_test, y_pred = predictions)
    fitness = acc
print("ACC:",acc)
end_time=process_time()
print("time=",(end_time-start_time))

In [None]:
#gbest=transfer_func(gbest)
selected_features=transfer_func(gbest)
for j in range(tot_features):
    if selected_features[j]>=0.25:
        selected_features[j]=1
    else:
        selected_features[j]=0
number_of_selected_features = np.sum(selected_features)
print("NUM:",number_of_selected_features)

In [None]:
features=[]
for j in range(len(selected_features)):
    if selected_features[j]==1:
            features.append(df.columns[j])
if len(features)==0:
    acc=0
else:
    new_x_train = x_train[features]
    new_x_test = x_test[features]
    _classifier = KNeighborsClassifier(n_neighbors=5)
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    acc = accuracy_score(y_true = y_test, y_pred = predictions)
    fitness = acc
print("ACC:",acc)
end_time=process_time()
print("time=",(end_time-start_time))