In [1]:
import warnings
warnings.filterwarnings('ignore')

import os, glob
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn import preprocessing, metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split

# from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

In [2]:
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, SeparableConv2D, LeakyReLU
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adadelta, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical

In [3]:
class attrdict(dict):
    def __getattr__(self, name):
        return self[name]

    def __setattr__(self, name, value):
        self[name] = value

In [4]:
data_dir = 'data'
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
val_dir = os.path.join(data_dir, 'val')
model_dir = 'model'

width, height = 160, 160
resize = (width, height)
input_shape = (width, height, 1)
num_classes = 2

batch_size = 32
epochs = 10

In [5]:
data = attrdict()
classes = ['NORMAL', 'PNEUMONIA_BACTERIA', 'PNEUMONIA_VIRUS']
train_test_dir = ['train', 'test', 'val']

for class_ in classes: data[class_] = []

le = LabelEncoder()

print("[STATUS] Load images directory")
for dir_path in glob.glob(os.path.join(data_dir, '*')):
    kind_dir = os.path.split(dir_path)[-1]
    if kind_dir in train_test_dir:
        data[kind_dir] = attrdict()
        data[kind_dir]['x'], data[kind_dir]['y'] = [], []

        for path in glob.glob(os.path.join(dir_path, '*')):
            label = os.path.split(path)[-1]

            images = glob.glob(os.path.join(path, '*.jpeg'))
            if len(images) != 0:
                for image in tqdm(images, desc='Load ' + path):
                    if 'bacteria' in image:
                        data[kind_dir]['y'].append(classes[1])
                        if label != 'val':
                            data[classes[1]].append(image)
                    elif 'virus' in image:
                        data[kind_dir]['y'].append(classes[2])
                        if label != 'val':
                            data[classes[2]].append(image)
                    else:
                        data[kind_dir]['y'].append(label)
                        if label != 'val':
                            data[classes[0]].append(image)

                    image = cv.imread(image, 0)
                    image = cv.resize(image, resize)
                    data[kind_dir]['x'].append(image)

        data[kind_dir]['x'] = np.array(data[kind_dir]['x'], dtype='f') / 255
        data[kind_dir]['x'] = data[kind_dir]['x'].reshape(data[kind_dir]['x'].shape[0], width, height, 1)
        data[kind_dir]['y'] = np.array(data[kind_dir]['y'])

        le.fit(data[kind_dir]['y'])
        data[kind_dir]['y'] = le.transform(data[kind_dir]['y'])
        data[kind_dir]['y'] = to_categorical(data[kind_dir]['y'], len(classes))

for class_ in classes: data[class_] = np.array(data[class_])

print("[STATUS] Load images completed")

Load data\test\NORMAL:   0%|                                                                   | 0/234 [00:00<?, ?it/s]

[STATUS] Load images directory


Load data\test\NORMAL: 100%|█████████████████████████████████████████████████████████| 234/234 [00:09<00:00, 24.75it/s]
Load data\test\PNEUMONIA: 100%|██████████████████████████████████████████████████████| 390/390 [00:10<00:00, 36.12it/s]
Load data\train\NORMAL: 100%|██████████████████████████████████████████████████████| 1341/1341 [02:09<00:00, 10.36it/s]
Load data\train\PNEUMONIA: 100%|███████████████████████████████████████████████████| 3875/3875 [01:41<00:00, 38.15it/s]
Load data\val\NORMAL: 100%|██████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 10.56it/s]
Load data\val\PNEUMONIA: 100%|███████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 38.02it/s]

[STATUS] Load images completed





In [12]:
for kind in data:
    if kind in train_test_dir:
        print('%s\t: x%s\t| y%s' % 
              (kind, data[kind].x.shape, data[kind].y.shape))
    else:
        print('%s : %s' % (kind, data[kind].shape[0]))

NORMAL : 1583
PNEUMONIA_BACTERIA : 2780
PNEUMONIA_VIRUS : 1493
test	: x(832, 160, 160, 1)	| y(832, 3)
train	: x(7484, 160, 160, 1)	| y(7484, 3)
val	: x(16, 160, 160, 1)	| y(16, 3)


In [7]:
data.train.x = np.concatenate((data.train.x, data.test.x), axis=0)
data.train.y = np.concatenate((data.train.y, data.test.y), axis=0)

print('train :', data.train.x.shape)

train : (5840, 160, 160, 1)


In [8]:
over_sampling = RandomOverSampler(random_state=42)
x = data.train.x.reshape(data.train.x.shape[0], width*height)
data.train.x, data.train.y = over_sampling.fit_resample(x, data.train.y)
data.train.x = data.train.x.reshape(data.train.x.shape[0], width, height, 1)

In [9]:
data.train.x, data.test.x, data.train.y, data.test.y = \
train_test_split(data.train.x, data.train.y, test_size=0.10, random_state=42)

In [14]:
for kind in data:
    if kind in train_test_dir:
        print('%s\t: x%s\t| y%s' % 
              (kind, data[kind].x.shape, data[kind].y.shape))
#     else:
#         print('%s : %s' % (kind, data[kind].shape[0]))

test	: x(832, 160, 160, 1)	| y(832, 3)
train	: x(7484, 160, 160, 1)	| y(7484, 3)
val	: x(16, 160, 160, 1)	| y(16, 3)


In [23]:
K.clear_session()
model = Sequential([
    Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same', input_shape=input_shape),
    Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2)),

    SeparableConv2D(32, kernel_size=(3, 3), activation='relu', padding='same'),
    SeparableConv2D(32, kernel_size=(3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

#     SeparableConv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
#     SeparableConv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=(2, 2)),

#     SeparableConv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
#     SeparableConv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=(2, 2)),
#     Dropout(0.2),

#     SeparableConv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
#     SeparableConv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=(2, 2)),
#     Dropout(0.2),

    Flatten(),
#     Dense(512, activation='relu'),
#     Dropout(0.7),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(classes), activation='softmax')
])
model.compile(optimizer = Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
checkpoint = ModelCheckpoint(filepath=os.path.join(model_dir, 'best_weights_three_class.hdf5'), save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=2, mode='max')

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 160, 160, 16)      160       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 160, 160, 16)      2320      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 80, 80, 16)        0         
_________________________________________________________________
separable_conv2d (SeparableC (None, 80, 80, 32)        688       
_________________________________________________________________
separable_conv2d_1 (Separabl (None, 80, 80, 32)        1344      
_________________________________________________________________
batch_normalization (BatchNo (None, 80, 80, 32)        128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 40, 40, 32)        0

In [21]:
data.test.y

array([[0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       ...,
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0]])

In [24]:
history = model.fit(data.train.x, data.train.y,
                    batch_size=batch_size, epochs=epochs, verbose=1,
                    validation_data=(data.test.x, data.test.y), callbacks=[checkpoint, lr_reduce])

score = model.evaluate(data.test.x, data.test.y, verbose=0)
print('\nEvaluate')
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 7484 samples, validate on 832 samples
Epoch 1/10
  32/7484 [..............................] - ETA: 4:51:14

UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential/conv2d/Conv2D (defined at <ipython-input-24-02d5524754a8>:3) ]] [Op:__inference_distributed_function_2523]

Function call stack:
distributed_function
