In [15]:
import logging
import os

# LOGGING CONFIG ##############################
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
###############################################

In [16]:
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt

In [17]:
'''
using the chest x-ray dataset from https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/
check if the chest x-ray dataset exists
unzip the chest x-ray dataset if it exists
'''
if not os.path.exists('chest_xray'):
    if os.path.exists('chest-xray-pneumonia.zip'):
        logging.info('unzipping the dataset file')
        os.system('unzip chest-xray-pneumonia.zip')
        logging.info('unzipping is done')
        os.system('rm chest-xray-pneumonia.zip')
    else:
        logging.warning('please download the dataset from https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/')

In [18]:
def create_data_from_image(dir, data_type, main, sub, size):
    training_data = []
    occurrence = [0, 0, 0]
    for cat in main:
        logging.info('reading images for category {}'.format(cat))
        path =  os.path.join(dir, data_type, cat)
        label = main.index(cat)
        for img in os.listdir(path):
            # read and resize image
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                img_array = cv2.resize(img_array, (size, size))

                # assign label for virus and bateria
                if label != 0:
                    if sub[0] in img:
                        label = 1
                    else:
                        label = 2

                # append to training data
                occurrence[label] += 1
                training_data.append([img_array, label])

            except:
                logging.warn('error reading {}'.format(img))
    
    logging.info('reading images done')
    return training_data, occurrence

            
directory = 'chest_xray'
data_type = 'train'   # choose between train and test
main_category = ['NORMAL', 'PNEUMONIA']
sub_category = ['bacteria', 'virus']
image_size = 200

# create training data with labels: 0:normal, 1:bacterial 2:viral
training_data, occurrence = create_data_from_image(directory, data_type, main_category, sub_category, image_size)

2020-05-14 15:42:44 INFO     reading images for category NORMAL
2020-05-14 15:43:11 INFO     reading images for category PNEUMONIA
2020-05-14 15:43:35 INFO     reading images done


In [19]:
# # number of images with labels 0, 1 and 2
print('Normal:', occurrence[0], 'Bacterial:', occurrence[1], 'Viral:', occurrence[2])
# weight is labels based on their occurence
weight = [float(i)/sum(occurrence) for i in occurrence]
print('Normal:', weight[0], 'Bacterial:', weight[1], 'Viral:', weight[2])
class_weight = {0: weight[0],
                1: weight[1],
                2: weight[2]}

# shuffle the training data otherwise the neural network model will be inefficient
import random 
random.shuffle(training_data)

Normal: 1341 Bacterial: 2530 Viral: 1345
Normal: 0.2570935582822086 Bacterial: 0.48504601226993865 Viral: 0.25786042944785276


In [20]:
# separate features and labels
X = []
y = []
for features, label in training_data:
    X.append(features)
    y.append(label)

In [21]:
# we have to convert a list to a numpy array that is understandable for tensorflow
# -1 means everything in the list, 1 is because the image is gray scale
X = np.array(X).reshape(-1, image_size, image_size, 1)


In [22]:
# save the training data
# you need at least 8GB of ram for this
import pickle
pickle_out = open('trainings/X_3labels.pickle', 'wb')
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open('trainings/y_3labels.pickle', 'wb')
pickle.dump(y, pickle_out)
pickle_out.close()

In [23]:
# import libraries to create neural networks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard

In [24]:
# uncomment the following to read X and y in case we want to re run from here
# this avoids the need to re-read all images
# import pickle
# X = pickle.load(open('trainings/X_3labels.pickle', 'rb'))
# y = pickle.load(open('trainings/y_3labels.pickle', 'rb'))

# in case of gray scale image data, we normalize it 
X = X/255.0 

In [25]:
import time
NAME = 'xray-3_labels-4_layers-2x32-{}'.format(int(time.time()))
# tensorboard = TensorBoard(log_dir='trainings/{}'.format(NAME))
# os.system("tensorboard --logdir='trainin/'")

In [26]:
# create the model 
model = Sequential() 

# layer 1
model.add(Conv2D(32, (4,4), strides=(4, 4), input_shape=X.shape[1:]))
model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))

# layer 2
model.add(Conv2D(32, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))

# layer 3
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))

# output layer, we use Dense(3) to have 3 labels 0,1,2
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 50, 50, 32)        544       
_________________________________________________________________
activation (Activation)      (None, 50, 50, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 32)        16416     
_________________________________________________________________
activation_1 (Activation)    (None, 24, 24, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 18432)             0         
_________________________________________________________________
dense (Dense)                (None, 64)                1179712   
__

In [27]:
model.fit(X, y, batch_size=100, epochs=20, class_weight=class_weight, validation_split=0.1)
# model.fit(X, y, batch_size=100, epochs=20, class_weight=class_weight, validation_split=0.1, callbacks=[tensorboard])

Train on 4694 samples, validate on 522 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fed0995eac8>

In [29]:
# save the network weights
model.save_weights("trainings/{}.h5".format(NAME))