In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from dotenv import load_dotenv
from PIL import Image

# Load environment variables
load_dotenv();

In [None]:
from data_generator import DataGenerator

# Create train, validation, and test data generators
train_data_gen = DataGenerator(os.getenv('TRAIN_DATA_DIR'))
train_data_gen.summary()

val_data_gen = DataGenerator(os.getenv('VAL_DATA_DIR'))
val_data_gen.summary()

test_data_gen = DataGenerator(os.getenv('TEST_DATA_DIR'))
test_data_gen.summary()

In [None]:
from tensorflow import keras
from keras import Sequential
from keras.layers import RandomFlip, RandomRotation, RandomZoom

# Create data augmentator
data_aug = Sequential(name='data_augmentation')
data_aug.add(RandomFlip(name='random_flip'))
data_aug.add(RandomRotation(0.05, fill_mode='nearest', name='random_rotation'))
data_aug.add(RandomZoom(0.1, fill_mode='nearest', name='random_zoom'))

In [None]:
# Data augmentation examples
fig, axes = plt.subplots(5, 2)

# Get all the paths to the our images from the data generator
paths = train_data_gen.get_paths()

for i in range(5):
    with Image.open(paths[i]) as img:
        axes[i,0].imshow(img)
        axes[i,0].set_axis_off()
        # Augment image with data_aug
        img = np.array(img)
        img = np.expand_dims(img, axis=-1)
        img_aug = data_aug(img).numpy()
        axes[i,1].imshow(img_aug.astype('int32'))
        axes[i,1].set_axis_off()

In [None]:
from keras import Input
from keras.applications import Xception
from keras.layers import GlobalMaxPooling2D, Dense

# Load Xception model and set weights to not trainable
xception = Xception(include_top=False, weights='imagenet')
xception.trainable = False

# Store the number of classes and input shape
num_classes = len(train_data_gen.labels)
input_shape = train_data_gen.input_shape + (3,) 

# Create model
def create_model(name):
	model = Sequential(name=name)
	model.add(Input(input_shape))
	model.add(xception)
	model.add(GlobalMaxPooling2D(name='global_pooling'))
	model.add(Dense(num_classes, activation='softmax', name='output_layer'))
	return model

model = create_model('phneumonia_classifier')
model.summary()


In [None]:
from keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy

# Compile model
model.compile(
    loss=CategoricalCrossentropy(),
    optimizer=Adam(),
    metrics=[CategoricalAccuracy()]
)

In [None]:
from keras.applications.xception import preprocess_input

# Add the preprocess_input func to our data generators
train_data_gen.preproc_func = preprocess_input
val_data_gen.preproc_func = preprocess_input
test_data_gen.preproc_func = preprocess_input

In [None]:
from keras.callbacks import EarlyStopping

history = model.fit(
    x=train_data_gen,
    epochs=100,
    batch_size=train_data_gen.batch_size,
    validation_data=val_data_gen,
    validation_batch_size=val_data_gen.batch_size,
    # Earlystopping stops training when val_loss stops improving
    callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
)

In [None]:
def plot_history(history, *args):
    for arg in args:
        plt.plot(history.history[arg], label=f"{arg}")
    plt.xlabel('epoch')
    plt.legend()
    plt.show()

In [None]:
plot_history(history, 'loss', 'val_loss')

In [None]:
plot_history(history, 'categorical_accuracy', 'val_categorical_accuracy')

In [None]:
val_data_gen.summary()

# Remark:
The validation data generator only contains 16 data points. Lets create a more representive validation data generator by partitioning out training data generator.

In [None]:
# Create a new validation data generator with 20 percent of the training data
val_data_gen = train_data_gen.partition_data_generator(0.2)

train_data_gen.summary()
val_data_gen.summary()

## DON'T LEAK DATA!
Since our model was fitted with some of validation data included in the new validation data generator, we must recreate our model so that the weights have not been fitted with any of the validation data.

In [None]:
# Recreate model
model = create_model('phneumonia_classifier')
model.summary()

In [None]:
# Compile model
model.compile(
    loss=CategoricalCrossentropy(),
    optimizer=Adam(),
    metrics=[CategoricalAccuracy()]
)

In [None]:
history = model.fit(
    x=train_data_gen,
    epochs=100,
    batch_size=train_data_gen.batch_size,
    validation_data=val_data_gen,
    validation_batch_size=val_data_gen.batch_size,
    # Earlystopping stops training when val_loss stops improving
    callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
)

In [None]:
plot_history(history, 'loss', 'val_loss')

In [None]:
plot_history(history, 'categorical_accuracy', 'val_categorical_accuracy')

In [None]:
model.evaluate(test_data_gen);

In [None]:
# Create data augmentation layers and preprocessing function
def data_aug_and_preproc(inputs):
	z = RandomFlip(name='random_flip')(inputs)
	z = RandomRotation(0.05, fill_mode='nearest', name='random_rotation')(z)
	z = RandomZoom(0.1, fill_mode='nearest', name='random_zoom')(z)
	return preprocess_input(z)

# Add data augmentation and preprocessing function to the training data generator
train_data_gen.preproc_func = data_aug_and_preproc

In [None]:
history = model.fit(
    x=train_data_gen,
    epochs=100,
    batch_size=train_data_gen.batch_size,
    validation_data=val_data_gen,
    validation_batch_size=val_data_gen.batch_size,
    # Changed patience to 10 so that our model is trained on more
    # augmented data
    callbacks=[EarlyStopping(monitor='val_loss', patience=10)]
)

In [None]:
plot_history(history, 'loss', 'val_loss')

In [None]:
model.evaluate(test_data_gen);