In [1]:
class cfg:
    IMAGE_SIZE = 128

    # Data Augmentation
    BRIGHTNESS = (0.64, 1.37) # (MIN, MAX)
    CONTRAST   = (0.64, 1.37) # (MIN, MAX)

    # Train-Val Split
    TRAIN_VAL_SPLIT = 0.8 # 80% of all the samples are used for training, and the rest for validation

    # Model Training
    BATCH_SIZE = 8
    EPOCHS = 20
    LEARNING_RATE = 0.00008

In [None]:
# Miscellaneous
from tqdm import tqdm
import random
import os
import time
# Turn off tensorflow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

# For Data Processing & ML Models
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import load_img
from PIL import Image, ImageEnhance
import cv2

# For Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Enable offline usage of plotly
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)

from IPython.display import clear_output
clear_output()

In [None]:
tf.random.set_seed(100)
np.random.seed(100)

In [None]:
tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [None]:
data_directories = []#dataset

all_paths = []
all_labels = []

for data_dir in data_directories:
    for label in os.listdir(data_dir):
        for image in os.listdir(data_dir+label):
            all_paths.append(data_dir+label+'/'+image)
            if label=='TURBERCULOSIS':
                all_labels.append('TUBERCULOSIS')
            else:
                all_labels.append(label)

all_paths, all_labels = shuffle(all_paths, all_labels)

pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'path':all_paths, 'label':all_labels})
df

In [None]:
plt.rcParams.update({'font.size': 13})

plt.figure(figsize=(7,7))

# plotting data on chart
plt.pie(list(df.label.value_counts()),
        labels=list(df.label.value_counts().index),
        colors=['#43b0c1', '#368d9a', '#286a74', '#69c0cd'],
        autopct='%.0f%%', wedgeprops = { 'linewidth' : 7, 'edgecolor' : 'white' })

my_circle=plt.Circle((0,0), 0.675, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.title('Dataset\nDistribution', x=0.5, y=0.45) 
plt.show()

In [None]:
def augment_image(image):
    # Convert input image from numpy to PIL Image
    image = Image.fromarray(np.uint8(image)) 
    # Select brightness and contrast factors
    brightness_factor = random.uniform(cfg.BRIGHTNESS[0],cfg.BRIGHTNESS[1])
    contrast_factor = random.uniform(cfg.CONTRAST[0],cfg.CONTRAST[1])
    # Apply Transformations
    image = ImageEnhance.Brightness(image).enhance(brightness_factor)
    image = ImageEnhance.Contrast(image).enhance(contrast_factor)
    return image

In [None]:
def open_images(paths, augment=True):
    '''
    Given a list of paths to images, this function returns the images as arrays, and conditionally augments them
    '''
    images = []
    for path in paths:
        image = load_img(path, target_size=(cfg.IMAGE_SIZE,cfg.IMAGE_SIZE))
        if augment:
            image = augment_image(image)
        image = np.array(image)
        image = image/image.max()
        images.append(image)
    return np.array(images)

# Randomly select and plot a few images with augmentation
k = random.randint(0,5000)
image_paths = list(df.path[k:k+10])
labels = list(df.label[k:k+10])
images = open_images(image_paths, augment=True)

plt.rcParams.update({'font.size': 10})
fig = plt.figure(figsize=(20, 8))

for i in range(0, 10):
    fig.add_subplot(2, 5, i+1)
    plt.imshow(images[i])
    plt.axis('off')
    plt.title(labels[i])
plt.show()

In [None]:
LABELS = ['NORMAL', 'TUBERCULOSIS', 'PNEUMONIA', 'COVID19']
label_encoder = {'NORMAL': 0, 'TUBERCULOSIS': 1, 'PNEUMONIA': 2, 'COVID19': 3}
label_decoder = {0: 'NORMAL', 1: 'TUBERCULOSIS', 2: 'PNEUMONIA', 3: 'COVID19'}

In [None]:
df_train = df[:int(cfg.TRAIN_VAL_SPLIT*len(df))]
df_val = df[int(cfg.TRAIN_VAL_SPLIT*len(df)):]

In [None]:
plt.rcParams.update({'font.size': 13})
fig = plt.figure(figsize=(14, 7))

fig.add_subplot(1, 2, 1)
plt.pie(list(df_train.label.value_counts()),
        labels=list(df_train.label.value_counts().index),
        colors=['#43b0c1', '#368d9a', '#286a74', '#69c0cd'],
        autopct='%.0f%%', wedgeprops = { 'linewidth' : 7, 'edgecolor' : 'white' })

my_circle=plt.Circle((0,0), 0.675, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.axis('off')
plt.title('Training', x=0.5, y=0.5) 


fig.add_subplot(1, 2, 2)
plt.pie(list(df_val.label.value_counts()),
        labels=list(df_val.label.value_counts().index),
        colors=['#43b0c1', '#368d9a', '#286a74', '#69c0cd'],
        autopct='%.0f%%', wedgeprops = { 'linewidth' : 7, 'edgecolor' : 'white' })

my_circle=plt.Circle((0,0), 0.675, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.axis('off')
plt.title('Validation', x=0.5, y=0.5) 

plt.show()

In [None]:
def data_generator(df, batch_size=cfg.BATCH_SIZE, augment=True, epochs=cfg.EPOCHS):
    for e in range(epochs):
        for x in range(0,len(df), batch_size):
            image_paths = df.path[x:x+batch_size]
            images = open_images(image_paths, augment=augment)
            labels = df.label[x:x+batch_size]
            labels = [label_encoder[label] for label in labels]
            yield images, np.array(labels)

In [None]:
# Creating generators for training and validation. You can change the parameters in the configuration section.
train_data_generator = data_generator(df_train, batch_size=cfg.BATCH_SIZE, augment=True, epochs=cfg.EPOCHS)
train_steps = int(len(df_train)/cfg.BATCH_SIZE)

val_data_generator = data_generator(df_val, batch_size=cfg.BATCH_SIZE, augment=False, epochs=cfg.EPOCHS)
val_steps = int(len(df_val)/cfg.BATCH_SIZE)

In [None]:
model = models.Sequential(name='classifier')
model.add(layers.Input(shape=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3), name='input'))
model.add(layers.Conv2D(32, (3,3), activation='relu', padding='same', name='block1_conv1'))
model.add(layers.Conv2D(32, (3,3), activation='relu', padding='same', name='block1_conv2'))
model.add(layers.MaxPool2D(pool_size=(2,2), name='pool1'))
model.add(layers.Conv2D(64, (3,3), activation='relu', padding='same', name='block2_conv1'))
model.add(layers.Conv2D(64, (3,3), activation='relu', padding='same', name='block2_conv2'))
model.add(layers.MaxPool2D(pool_size=(2,2), name='pool2'))
model.add(layers.Conv2D(128, (3,3), activation='relu', padding='same', name='block3_conv1'))
model.add(layers.Conv2D(128, (3,3), activation='relu', padding='same', name='block3_conv2'))
model.add(layers.MaxPool2D(pool_size=(2,2), name='pool3'))
model.add(layers.Flatten(name='flatten'))
model.add(layers.Dropout(0.3, name='dropout1'))
model.add(layers.Dense(128, activation='relu', name='dense1'))
model.add(layers.Dense(4, activation='softmax', name='final'))

model.summary()

In [None]:
model.compile(optimizer=optimizers.Adam(learning_rate=cfg.LEARNING_RATE),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
class_weights = (df_train.label.value_counts() / len(df_train)).to_dict()
class_weights = dict((label_encoder[key], value) for (key, value) in class_weights.items())
class_weights

In [None]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='model.hdf5',
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max', verbose=1,
    save_best_only=True)

history = model.fit(train_data_generator, epochs=cfg.EPOCHS, steps_per_epoch=train_steps, class_weight=class_weights,
          validation_data=val_data_generator, validation_steps=val_steps, callbacks=[model_checkpoint_callback])

In [None]:
val_data_generator = data_generator(df_val, batch_size=cfg.BATCH_SIZE, augment=False, epochs=1)
val_steps = int(len(df_val)/cfg.BATCH_SIZE)

y_pred = []
y_true = []

for x,y in tqdm(val_data_generator, total=val_steps):
    pred = model.predict(x)
    pred = np.argmax(pred, axis=-1)
    for i in pred:
        y_pred.append(label_decoder[i])
    for i in y:
        y_true.append(label_decoder[i])
clear_output()
print(classification_report(y_true, y_pred, digits=4))