In [7]:
import pandas as pd 
import cv2                 
import numpy as np         
import os                  
from random import shuffle
from tqdm import tqdm  
import scipy
import skimage
from skimage.transform import resize
print(os.listdir("chest_xray"))

['.DS_Store', 'test', 'train', 'val']


In [8]:
TRAIN_DIR = "chest_xray/train/"
TEST_DIR =  "chest_xray/test/"
VAL_DIR = "chest_xray/val/"

In [9]:
def get_label(Dir):
    for nextdir in os.listdir(Dir):
        if not nextdir.startswith('.'):
            if nextdir in ['NORMAL']:
                label = 0
            elif nextdir in ['PNEUMONIA']:
                label = 1
            else:
                label = 2
    return nextdir, label

In [10]:
def preprocessing_data(Dir):
    X = []
    y = []
    
    for nextdir in os.listdir(Dir):
        nextdir, label = get_label(Dir)
        temp = Dir + nextdir
        
        for image_filename in tqdm(os.listdir(temp)):
            path = os.path.join(temp + '/' , image_filename)
            img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = skimage.transform.resize(img, (150, 150, 3))
                img = np.asarray(img)
                X.append(img)
                y.append(label)
            
    X = np.asarray(X)
    y = np.asarray(y)
    
    return X,y

In [11]:
def get_data(Dir):
    X = []
    y = []
    for nextDir in os.listdir(Dir):
        if not nextDir.startswith('.'):
            if nextDir in ['NORMAL']:
                label = 0
            elif nextDir in ['PNEUMONIA']:
                label = 1
            else:
                label = 2
                
            temp = Dir + nextDir
                
            for file in tqdm(os.listdir(temp)):
                img = cv2.imread(temp + '/' + file)
                if img is not None:
                    img = skimage.transform.resize(img, (150, 150, 3))
                    #img_file = scipy.misc.imresize(arr=img_file, size=(150, 150, 3))
                    img = np.asarray(img)
                    X.append(img)
                    y.append(label)
                    
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [12]:
X_train, y_train = get_data(TRAIN_DIR)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
100%|██████████| 3877/3877 [02:57<00:00, 21.88it/s]
100%|██████████| 1343/1343 [02:13<00:00, 10.08it/s]


In [13]:
X_test , y_test = get_data(TEST_DIR)


100%|██████████| 390/390 [00:15<00:00, 25.12it/s]
100%|██████████| 234/234 [00:22<00:00,  9.90it/s]


In [14]:
print(X_train.shape,'\n',X_test.shape)

(5216, 150, 150, 3) 
 (624, 150, 150, 3)


In [15]:
print(y_train.shape,'\n',y_test.shape)

(5216,) 
 (624,)


In [16]:

from keras.utils.np_utils import to_categorical

y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)

Using TensorFlow backend.


In [17]:

print(y_train.shape,'\n',y_test.shape)

(5216, 2) 
 (624, 2)


In [18]:
Pimages = os.listdir(TRAIN_DIR + "PNEUMONIA")
Nimages = os.listdir(TRAIN_DIR + "NORMAL")

In [19]:

from keras.callbacks import ReduceLROnPlateau , ModelCheckpoint
lr_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=0.0001, patience=1, verbose=1)



In [20]:
filepath="weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

In [21]:
from keras.models import Sequential
from keras.layers import Dense , Activation
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD , RMSprop
from keras.layers import Conv2D , BatchNormalization
from keras.layers import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [22]:
X_train=X_train.reshape(5216,3,150,150)
X_test=X_test.reshape(624,3,150,150)

In [23]:

def swish_activation(x):
    return (K.sigmoid(x) * x)

model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', padding="same", input_shape=(3,150,150)))
model.add(Conv2D(16, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=(3,150,150)))
model.add(Conv2D(32, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(96, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(96, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(128, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation=swish_activation))
model.add(Dropout(0.4))
model.add(Dense(2 , activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                  optimizer=RMSprop(lr=0.00005),
                  metrics=['accuracy'])

print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 16, 150, 150)      448       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 150, 150)      2320      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 75, 75)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 75, 75)        4640      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 75, 75)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxP

In [25]:
batch_size = 256
epochs = 6

In [26]:

history = model.fit(X_train, y_train, validation_data = (X_test , y_test) ,callbacks=[lr_reduce,checkpoint] ,
          epochs=epochs)

Instructions for updating:
Use tf.cast instead.
Train on 5216 samples, validate on 624 samples
Epoch 1/6

Epoch 00001: val_acc improved from -inf to 0.67308, saving model to weights.hdf5
Epoch 2/6

Epoch 00002: val_acc improved from 0.67308 to 0.73798, saving model to weights.hdf5
Epoch 3/6

Epoch 00003: val_acc improved from 0.73798 to 0.74679, saving model to weights.hdf5
Epoch 4/6

Epoch 00004: val_acc improved from 0.74679 to 0.76442, saving model to weights.hdf5
Epoch 5/6

Epoch 00005: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.

Epoch 00005: val_acc did not improve from 0.76442
Epoch 6/6

Epoch 00006: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-07.

Epoch 00006: val_acc did not improve from 0.76442


In [27]:
from sklearn.metrics import confusion_matrix
pred = model.predict(X_test)
pred = np.argmax(pred,axis = 1) 
y_true = np.argmax(y_test,axis = 1)

In [29]:
CM = confusion_matrix(y_true, pred)

#fig, ax = plot_confusion_matrix(conf_mat=CM ,  figsize=(5, 5))
#plt.show()

In [30]:
CM

array([[102, 132],
       [ 15, 375]])

In [31]:
#recall

102/(102+15)

0.8717948717948718

In [28]:
from sklearn.metrics import f1_score
f1_score(y_true, pred, average='weighted') 


0.7405239687848383

In [35]:
def get_val_data(Dir):
    X = []
    y =[]
    for nextDir in os.listdir(Dir):
        if not nextDir.startswith('.'):
            if nextDir in ['NORMAL']:
                label = 0
            elif nextDir in ['PNEUMONIA']:
                label = 1
            else:
                label = 2
                
            temp = Dir + nextDir
                
            for file in tqdm(os.listdir(temp)):
                img = cv2.imread(temp + '/' + file)
                if img is not None:
                    img = skimage.transform.resize(img, (150, 150, 3))
                    #img_file = scipy.misc.imresize(arr=img_file, size=(150, 150, 3))
                    img = np.asarray(img)
                    X.append(img)
                    y.append(label)
                    
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [62]:
X_val, y_val =get_val_data(VAL_DIR)

100%|██████████| 11/11 [00:00<00:00, 24.17it/s]
100%|██████████| 10/10 [00:00<00:00,  9.66it/s]


In [63]:
X_val=X_val.reshape(17,3,150,150)


In [64]:
prediction = model.predict(X_val)


In [67]:
prediction = np.argmax(prediction,axis = 1) 
y_true_val = np.argmax(y_val_true,axis=1)


In [65]:
y_val_true = to_categorical(y_val, 2)

In [69]:
CM = confusion_matrix(y_true_val, prediction)


In [70]:
CM

array([[6, 2],
       [0, 9]])

In [130]:
test_image = cv2.imread('chest_xray/val/PNEUMONIA/puppy-1903313__340.JPG')

In [131]:
test_image = skimage.transform.resize(test_image, (150, 150, 3))
test_image = np.asarray(test_image)
test_image=test_image.reshape(1,3,150,150)

In [132]:
prediction = model.predict(test_image)

In [133]:
prediction

array([[0.00399561, 0.9958295 ]], dtype=float32)