In [1]:
import random
import time
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import benchmarks

data=pd.read_csv("cleavland.csv")
data.head()

Unnamed: 0,AGE,SEX,CP,TRESTBPS,CHOL,FBD,REST ECG,THALACH,EXANG,OLDPEAK,SLOPE,CA,THAL,RESULT
0,63,1,1,145,233,1,2,150,0,2.3,3,0,6,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0


In [2]:
##data preporcessing,extraction and splitting
X=data.iloc[:,:13].values
Y=data['RESULT'].values
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.40, random_state = 14)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

(303, 13)


In [3]:
##Applying SVM classification without data processing
svcclassifier = SVC(kernel = 'poly', random_state = 50)
svcclassifier.fit(X_train, y_train)
y_pred = svcclassifier.predict(X_test)
P=accuracy_score(y_pred,y_test)
print("Accuracy score for SVM:",P)

Accuracy score for SVM: 0.5983606557377049


In [4]:
##GWO Optimization algo
def GWO(objf,lb,ub,dim,SearchAgents_no,Max_iter):
    
    # initialize alpha, beta, and delta_pos
    Alpha_pos=np.zeros(dim)
    Alpha_score=float("inf")
    
    Beta_pos=np.zeros(dim)
    Beta_score=float("inf")
    
    Delta_pos=np.zeros(dim)
    Delta_score=float("inf")

    if not isinstance(lb, list):
        lb = [lb] * dim
    if not isinstance(ub, list):
        ub = [ub] * dim
    
    #Initialize the positions of search agents
    Positions = np.zeros((SearchAgents_no, dim))
    for i in range(dim):
        Positions[:, i] = np.random.uniform(0,1, SearchAgents_no) * (ub[i] - lb[i]) + lb[i]
    
    Convergence_curve=np.zeros(Max_iter)

     # Loop counter
    print("GWO is optimizing  \""+objf.__name__+"\"")    

    # Main loop
    for l in range(0,Max_iter):
        for i in range(0,SearchAgents_no):
            
            # Return back the search agents that go beyond the boundaries of the search space
            for j in range(dim):
                Positions[i,j]=np.clip(Positions[i,j], lb[j], ub[j])

            # Calculate objective function for each search agent
            fitness=objf(Positions[i,:])
            
            # Update Alpha, Beta, and Delta
            if fitness<Alpha_score :
                Alpha_score=fitness; # Update alpha
                Alpha_pos=Positions[i,:].copy()
            
            
            if (fitness>Alpha_score and fitness<Beta_score ):
                Beta_score=fitness  # Update beta
                Beta_pos=Positions[i,:].copy()
            
            
            if (fitness>Alpha_score and fitness>Beta_score and fitness<Delta_score): 
                Delta_score=fitness # Update delta
                Delta_pos=Positions[i,:].copy()
        
        a=2-l*((2)/Max_iter); # a decreases linearly fron 2 to 0
        
        # Update the Position of search agents including omegas
        for i in range(0,SearchAgents_no):
            for j in range (0,dim):     
                           
                r1=random.random() # r1 is a random number in [0,1]
                r2=random.random() # r2 is a random number in [0,1]
                
                A1=2*a*r1-a; # Equation (3.3)
                C1=2*r2; # Equation (3.4)
                
                D_alpha=abs(C1*Alpha_pos[j]-Positions[i,j]); # Equation (3.5)-part 1
                X1=Alpha_pos[j]-A1*D_alpha; # Equation (3.6)-part 1
                           
                r1=random.random()
                r2=random.random()
                
                A2=2*a*r1-a; # Equation (3.3)
                C2=2*r2; # Equation (3.4)
                
                D_beta=abs(C2*Beta_pos[j]-Positions[i,j]); # Equation (3.5)-part 2
                X2=Beta_pos[j]-A2*D_beta; # Equation (3.6)-part 2       
                
                r1=random.random()
                r2=random.random() 
                
                A3=2*a*r1-a; # Equation (3.3)
                C3=2*r2; # Equation (3.4)
                
                D_delta=abs(C3*Delta_pos[j]-Positions[i,j]); # Equation (3.5)-part 3
                X3=Delta_pos[j]-A3*D_delta; # Equation (3.5)-part 3             
                
                Positions[i,j]=(X1+X2+X3)/3  # Equation (3.7)        
        Convergence_curve[l]=Alpha_score;

        #if (l%1==0):
               #print(['At iteration '+ str(l)+ ' the best fitness is '+ str(Alpha_score)]);
    
    print(Positions.shape)
    print("Alpha position=",Alpha_pos);
    print("Beta position=",Beta_pos);
    print("Delta position=",Delta_pos);
    return Alpha_pos,Beta_pos;


In [19]:
##setting GWO parameters
iters=100
wolves=5
dimension=13
search_domain=[0,1]
lb=-1.28
ub=1.28
colneeded=[0,1,2,4,5,7,8,10,11]
modified_data=pd.DataFrame()
for i in colneeded:
    modified_data[data.columns[i]]=data[data.columns[i]].astype(float)
func_details=benchmarks.getFunctionDetails(6)

for i in range(0,10):
    alpha,beta=GWO(getattr(benchmarks,'F7'),lb,ub,dimension,wolves,iters)

GWO is optimizing  "F7"
(5, 13)
Alpha position= [ 0.01252327 -0.0483109  -0.17705457  0.19507274 -0.08585988 -0.01247825
 -0.12179401 -0.08685714 -0.05318876  0.0546588  -0.00942631  0.09732337
 -0.06454506]
Beta position= [ 0.01063009 -0.04962125 -0.18158248  0.2055276  -0.07970993 -0.01200631
 -0.12502062 -0.1024786  -0.05629819  0.04874084 -0.00908107  0.10369965
 -0.06920195]
Delta position= [ 0.01153663 -0.04890768 -0.17607478  0.19480708 -0.08279292 -0.01245187
 -0.12325509 -0.09221714 -0.05414851  0.05171203 -0.00931668  0.09783547
 -0.06673357]
GWO is optimizing  "F7"
(5, 13)
Alpha position= [-0.16121949  0.06262274 -0.25571004  0.22936675  0.18012065  0.09507736
 -0.07363912  0.17322988  0.01574605  0.10496624 -0.00306901 -0.03607181
  0.065726  ]
Beta position= [-0.16683586  0.06157988 -0.3096108   0.20750246  0.15152123  0.08666888
 -0.08937082  0.18417215  0.01674468  0.10692481 -0.00314932 -0.03285284
  0.06389465]
Delta position= [-0.18189504  0.05690501 -0.28537238  0.23

In [23]:
##Applying feature selection on the given dataset
##considering alpha as best solution and putting a threshold
threshold=-0.05
index=[]
print("alpha shape=",alpha.shape[0])
modified_daata=pd.DataFrame();
for i in range(0,alpha.shape[0]):
    if(alpha[i]>=threshold):
        modified_daata[data.columns[i]]=data[data.columns[i]].astype(float)
print("The modified data is following")
modified_daata.head()

alpha shape= 13
The modified data is following


Unnamed: 0,SEX,CP,CHOL,FBD,REST ECG,THALACH,EXANG,OLDPEAK,SLOPE
0,1.0,1.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0
1,1.0,4.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0
2,1.0,4.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0
3,1.0,3.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0
4,0.0,2.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0


In [24]:
##Applying svm on the modified data
Y=data['RESULT'].values
X_train, X_test, y_train, y_test = train_test_split(modified_daata, Y, test_size = 0.40, random_state = 14)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train[0])
modified_daata.info()

[ 0.6520712  -0.1543891  -1.45429374 -0.42779263 -0.9863518  -1.07775066
 -0.76662853 -0.37575918 -0.99376866]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 9 columns):
SEX         303 non-null float64
CP          303 non-null float64
CHOL        303 non-null float64
FBD         303 non-null float64
REST ECG    303 non-null float64
THALACH     303 non-null float64
EXANG       303 non-null float64
OLDPEAK     303 non-null float64
SLOPE       303 non-null float64
dtypes: float64(9)
memory usage: 21.4 KB


In [25]:
svcclassifier = SVC(kernel = 'poly', random_state = 50)
svcclassifier.fit(X_train, y_train)
y_pred = svcclassifier.predict(X_test)
P1=accuracy_score(y_pred,y_test)
print("Modified Accuracy score for SVM:",P1)
print("Earlier Accuracy score was",P)

Modified Accuracy score for SVM: 0.6229508196721312
Earlier Accuracy score was 0.5983606557377049
