In [None]:
# Import libraries
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import albumentations as A

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_addons as tfa

In [None]:
# Set seed
def seed_all(s):
    random.seed(s)
    np.random.seed(s)
    tf.random.set_seed(s)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['PYTHONHASHSEED'] = str(s) 
    
# Define the augmentation policies
transforms = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(p=0.5, limit=15),
    A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.1, 0.1), brightness_by_max=True),
    A.RandomResizedCrop(p=0.5, height=IMAGE_SIZE, width=IMAGE_SIZE, scale=(0.9, 1.1), ratio=(0.05, 1.1), interpolation=0),
])

# Apply augmentation policies.
def aug_fn(image):
    data = {"image":image}
    aug_data = transforms(**data)
    aug_img = aug_data["image"] 
    return aug_img

# Augmentation policies
def apply_augmentation(image, label):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image], Tout=tf.float32)
    aug_img.set_shape((IMAGE_SIZE, IMAGE_SIZE, 3))    
    return aug_img, label

# Preprocess image
def preprocess_data(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = image/255.0
    image = tf.squeeze(image, 0) 
    label = tf.squeeze(label, 0) 
    return image, label

# Plot training history
def training_history(history):
    accuracy = history['accuracy']
    val_accuracy = history['val_accuracy']

    loss = history['loss']
    val_loss = history['val_loss']

    epochs_range = range(len(history['loss']))

    plt.figure(figsize=(32, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, accuracy, label='Training Accuracy')
    plt.plot(epochs_range, val_accuracy, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')

    plt.show()
    return None

# Parse test images
def decode_test(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.cast(img, tf.float32)
    img = tf.image.resize(img, [IMAGE_SIZE, IMAGE_SIZE], antialias=True)/255
    return img

In [None]:
# Define variables
TRAINING_NAME = "CL_EffNetv2-B3"
AUTOTUNE = tf.data.experimental.AUTOTUNE
MODEL_NAME = "efficientnetv2-b3-21k-ft1k"
MODEL_HANDLE = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2"
IMAGE_SIZE = 300
BATCH_SIZE = 16
EPOCHS = 20
CLASS_NAMES = ['normal', 'pneumonia', 'COVID-19']
SEED = 124

seed_all(SEED)

In [None]:
# Load and preprocess the dataset
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    '../dataset/clahe/train/',
    label_mode = 'categorical',
    class_names = CLASS_NAMES,
    batch_size = 1,
    image_size = (IMAGE_SIZE, IMAGE_SIZE),
    shuffle = True,
    seed = SEED,
    interpolation = 'bilinear'
)

valid_ds = tf.keras.preprocessing.image_dataset_from_directory(
    '../dataset/clahe/valid/',
    label_mode = 'categorical',
    class_names = CLASS_NAMES,
    batch_size = 1,
    image_size = (IMAGE_SIZE, IMAGE_SIZE),
    shuffle = True,
    seed = SEED,
    interpolation = 'bilinear'
)

train_ds = (
    train_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE)
    .map(apply_augmentation, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

valid_ds = (
    valid_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

In [None]:
# Build model
feature_extractor_layer = hub.KerasLayer(MODEL_HANDLE, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), trainable=True)
model = tf.keras.Sequential([
    feature_extractor_layer,
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(len(CLASS_NAMES), activation='softmax')
])
model.summary()

In [None]:
# Set learning rate scheduler using CLR
clr_scheduler = tfa.optimizers.CyclicalLearningRate( 
    initial_learning_rate=3e-7,  maximal_learning_rate=7e-3,
    step_size=3*(20994//BATCH_SIZE),  
    scale_fn=lambda x: 1 / (2.0 ** (x - 1)), 
    scale_mode='cycle'
)

# Set the evaluation metrics
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
]

#compile model
model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=clr_scheduler) , 
    loss=tf.keras.losses.CategoricalCrossentropy(), 
    metrics=METRICS
)

# callback to stop training if the performance stop improving
earlyStopping = tf.keras.callbacks.EarlyStopping(patience=5, monitor='loss', verbose=1, restore_best_weights=True)

# train model
history = model.fit(
    train_ds, 
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1,
    callbacks=[earlyStopping],
    validation_data=valid_ds,
)

# Display the learning curve
training_history(history.history)

# Save the training history 
hist_df = pd.DataFrame(history.history) 
hist_df.to_csv("../history/"+TRAINING_NAME+".csv", index=False)

# Save trained model weight
model.save_weights("../model_weight/"+TRAINING_NAME+"_weights.h5")