I've preprocessed all the training pictures into sub directories, so I can figure out which one refers to a specific lung disease

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

DATASET_DIR = "../input/preprocessed-coronahack-train/train"
BATCH_SIZE = 16

Prepare data by using ImageDatagenerator with some augmentation

In [None]:
datagen = ImageDataGenerator(validation_split=0.3,
                             rescale=1./255,
                             horizontal_flip=True,
                             vertical_flip=True,
                             zoom_range=0.2,
                             brightness_range=[1, 2])

train_generator = datagen.flow_from_directory(
    DATASET_DIR,
    target_size=(200, 200),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="training"
)

test_generator = datagen.flow_from_directory(
    DATASET_DIR,
    target_size=(200, 200),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="validation"
)

Let's see what categories we got 

In [None]:
train_generator.class_indices

Let's fix some values for the CNN

In [None]:
INPUT_SHAPE = (200, 200, 3)
KERNEL_SIZE = 4
POOL_SIZE = 4
STRIDES = 4
DROPOUT = 0.5

Let's build the CNN model

In [None]:
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=KERNEL_SIZE, padding='same',
                 activation='relu', input_shape=INPUT_SHAPE))
model.add(MaxPooling2D(pool_size=POOL_SIZE, strides=STRIDES))
model.add(Dropout(DROPOUT))
model.add(Conv2D(filters=32, kernel_size=KERNEL_SIZE,
                 padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=POOL_SIZE, strides=STRIDES))
model.add(Dropout(DROPOUT))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(5, activation='softmax'))

model.compile(optimizer="adam", loss="categorical_crossentropy",
              metrics=["accuracy"])

Let's see what's inside the model


In [None]:
model.summary()

Now let's train the model and then plot the results

In [None]:
history = model.fit(train_generator, epochs=50)

metrics_train = model.evaluate(train_generator)
metrics_test = model.evaluate(test_generator)

print("Train Accuracy = %.4f - Train loss = %.4f" %
      (metrics_train[1], metrics_train[0]))
print("Test Accuracy = %.4f - Test loss = %.4f" %
      (metrics_test[1], metrics_test[0]))

plt.plot(history.history['accuracy'])
plt.plot(history.history['loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy', 'loss'], loc='upper left')
plt.show()
