In [1]:
from matplotlib import pyplot as plt
import numpy as np
import math
import os
import cv2
import pandas as pd
from skimage.color import rgb2gray
from skimage import feature
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [2]:
list1=os.listdir('C:/chest_x-ray/train_normal')
list2=os.listdir('C:/chest_x-ray/train_pneumonia') 
list3=os.listdir('C:/chest_x-ray/test_normal')
list4=os.listdir('C:/chest_x-ray/test_pneumonia') 
print('list1 :',len(list1))
print('list2 :',len(list2))
print('list3 :',len(list3))
print('list4 :',len(list4))

list1 : 1341
list2 : 3875
list3 : 234
list4 : 390


In [3]:
feat1=np.zeros((1,52)) 
for i in range(len(list1)):
    img_old=plt.imread('C:/chest_x-ray/train_normal/'+list1[i])
    img_gray=rgb2gray(img_old)
    img_gray_resized=cv2.resize(img_gray,(300,180),interpolation=cv2.INTER_AREA)
    lbp=feature.local_binary_pattern(img_gray_resized,method="uniform",P=8,R=1)
    hist,_=np.histogram(lbp.ravel(),bins=np.arange(0,53),range=(0,52))
    feat1=np.vstack((feat1,hist))
feat1=feat1[1:,:]
print('first class train shape :',feat1.shape)

feat2=np.zeros((1,52)) 
for i in range(len(list2)):
    img_old=plt.imread('C:/chest_x-ray/train_pneumonia/'+list2[i])
    img_gray=rgb2gray(img_old)
    img_gray_resized=cv2.resize(img_gray,(300,180),interpolation=cv2.INTER_AREA)
    lbp=feature.local_binary_pattern(img_gray_resized,method="uniform",P=8,R=1)
    numPoints=50
    hist,_=np.histogram(lbp.ravel(),bins=np.arange(0,numPoints+3),range=(0,numPoints+2))
    feat2=np.vstack((feat2,hist))
feat2=feat2[1:,:]
print('second class train shape :',feat2.shape)

feat3=np.zeros((1,52)) 
for i in range(len(list3)):
    img_old=plt.imread('C:/chest_x-ray/test_normal/'+list3[i])
    img_gray=rgb2gray(img_old)
    img_gray_resized=cv2.resize(img_gray,(300,180),interpolation=cv2.INTER_AREA)
    lbp=feature.local_binary_pattern(img_gray_resized,method="uniform",P=8,R=1)
    numPoints=50
    hist,_=np.histogram(lbp.ravel(),bins=np.arange(0,numPoints+3),range=(0,numPoints+2))
    feat3=np.vstack((feat3,hist))
feat3=feat3[1:,:]
print('first class test shape :',feat3.shape)

feat4=np.zeros((1,52)) 
for i in range(len(list4)):
    img_old=plt.imread('C:/chest_x-ray/test_pneumonia/'+list4[i])
    img_gray=rgb2gray(img_old)
    img_gray_resized=cv2.resize(img_gray,(300,180),interpolation=cv2.INTER_AREA)
    lbp=feature.local_binary_pattern(img_gray_resized,method="uniform",P=8,R=1)
    numPoints=50
    hist,_=np.histogram(lbp.ravel(),bins=np.arange(0,numPoints+3),range=(0,numPoints+2))
    feat4=np.vstack((feat4,hist))
feat4=feat4[1:,:]
print('second class test shape :',feat4.shape)

first class train shape : (1341, 52)
second class train shape : (3875, 52)
first class test shape : (234, 52)
second class test shape : (390, 52)


In [4]:
indices=np.random.randint(0,feat2.shape[0],1341)
feat2=feat2[indices,:]
X_train=np.vstack((feat1,feat2))
X_test=np.vstack((feat3,feat4))
y_train=np.hstack((np.zeros(1341),np.ones(1341)))
y_test=np.hstack((np.zeros(234),np.ones(390)))
print(X_train.shape,'  ',y_train.shape,'  ',X_test.shape,'  ',y_test.shape)

(2682, 52)    (2682,)    (624, 52)    (624,)


In [5]:
arr1=np.random.permutation(2682)
X_train=X_train[arr1,:]
y_train=y_train[arr1]

arr2=np.random.permutation(624)
X_test=X_test[arr2,:]
y_test=y_test[arr2]

In [6]:
fdr=[]                      
sha=X_train.shape
for i in range(sha[1]):
    x1=[]
    x2=[]
    for j in range(sha[0]):
        if(y_train[j]==1):
            x1.append(X_train[j,i])
        else:
            x2.append(X_train[j,i])
    m1=np.mean(x1)
    m2=np.mean(x2)
    s1=np.std(x1)
    s2=np.std(x2)
    if((s1==0)and(s2==0)):
        fdr1=0
    else:
        fdr1=((m1-m2)**2)/((s1**2)+(s2**2))
    fdr.append(fdr1)
fdr=np.array(fdr)   
fdr=-1*fdr        
q=np.argsort(fdr)  
X_train=X_train[:,q]
X_test=X_test[:,q]

In [7]:
grid={'kernel':['linear','rbf','sigmoid'],'C':[1,2,5,10,15,30,100,200]}
clf=GridSearchCV(SVC(random_state=0),grid,cv=5) 
#clf=SVC(kernel='linear',random_state=0)
clf.fit(X_train,y_train) 
print('best combination of model parameters :',clf.best_params_)
y_pred=clf.predict(X_test)







best combination of model parameters : {'C': 1, 'kernel': 'linear'}


In [8]:
acc=[]
sens=[]
spec=[]
for i in range(y_test.shape[0]):
    TP=0
    TN=0
    FN=0
    FP=0                                   
    if(y_test[i]==y_pred[i]):
        if(y_test[i]==1):
            TP+=1
        else:
            TN+=1
    else:
        if(y_pred[i]==1):
            FP+=1
        else:
            FN+=1
    acc_=(TP+TN)/(TP+TN+FP+FN)     
    acc.append(acc_)
    if((TP+FN)==0):
        sens_=0                    
    else:
        sens_=TP/(TP+FN)
    sens.append(sens_)
    if((TN+FP)==0):
        spec_=0                    
    else:
        spec_=TN/(TN+FP)
    spec.append(spec_)
print('accuracy :',np.max(acc))
print('senstivity :',np.max(sens))
print('specificity :',np.max(spec))

accuracy : 1.0
senstivity : 1.0
specificity : 1.0


In [9]:
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print('accuracy by accuracy_score :',accuracy_score(y_test,y_pred))

              precision    recall  f1-score   support

         0.0       0.80      0.74      0.77       234
         1.0       0.85      0.89      0.87       390

   micro avg       0.83      0.83      0.83       624
   macro avg       0.83      0.82      0.82       624
weighted avg       0.83      0.83      0.83       624

[[174  60]
 [ 43 347]]
accuracy by accuracy_score : 0.8349358974358975
