In [None]:
import keras
import os, shutil
#import random
import tensorflow
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plot

# Random under-sampling

In [None]:
# THIS CODE IS COMMENTED OUT BECAUSE IT ONLY NEEDS TO RUN ONCE ON ALL XRAY IMAGES OF LUNGS INFECTED WITH PNEUMONIA

#pneu_folder = '/Users/jakub/Desktop/pneu'

# create array with images without hidden files
#pneu_images_array = [file for file in os.listdir(pneu_folder) if not file.startswith('.')]
# set max range for the randint function
#random_max = 4272

#for image in range(2690):
#    index = random.randint(0, random_max)
    # decrease the max of the range since we are going to remove one image
#    random_max -= 1
    # delete the image
#    os.remove(os.path.join(pneu_folder, pneu_images_array[index]))
    # delete the image from the array of images
#    pneu_images_array.pop(index)



# Data Preprocessing

In [None]:
base_dir = '/Users/jakub/Documents/RUC/3rd_Semester/AI/project/pneumonia'

# Directories for training, validation and test
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

# define height and width for resizing
img_height = 128
img_width = 128

def preprocess_images(directory):
    datagen = ImageDataGenerator(1./255)
    return datagen.flow_from_directory(
        directory,
        color_mode = 'grayscale',
        target_size=(img_height, img_width),
        batch_size = 32,
        class_mode = 'binary',
        shuffle= True)

# preprocess images in train and validation folders
train_generator = preprocess_images(train_dir)
validation_generator = preprocess_images(validation_dir)

# Creating the network

In [None]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation = 'relu', input_shape=(img_height, img_width, 1)))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation ='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation ='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dropout(0.5))

model.add(Dense(512, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(
    loss = 'binary_crossentropy',
    optimizer = Adam(lr=1e-4),
    metrics = ['accuracy'])

model.summary()

In [None]:
# function to stop the run in case the values are not improving and restore the weights to the best run
earlyStopping = EarlyStopping(
    monitor = 'val_loss',
    patience = 3,
    verbose = 3,
    mode = 'min',
    restore_best_weights = True)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch = 100,
    epochs = 20,
    validation_data = validation_generator,
    validation_steps = 30,
    callbacks = [earlyStopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plot.plot(epochs, acc, 'bo', label='Training accuracy')
plot.plot(epochs, val_acc, 'b', label='Validation accuracy')
plot.title('Training and validation accuracy')
plot.legend()

plot.figure()

plot.plot(epochs, loss, 'bo', label='Training loss')
plot.plot(epochs, val_loss, 'b', label='Validation loss')
plot.title('Training and validation loss')
plot.legend()

plot.show()

In [None]:
test_generator = preprocess_images(test_dir)

test_loss, test_acc = model.evaluate(test_generator, steps=566)
print('test accuracy:', test_acc)
print('test loss:', test_loss)

In [None]:
model.save('pneu8.h5')