In [None]:
import numpy as np
import pandas as pd
import os
import random
import keras_tuner as kt
import matplotlib.pyplot as plt
import tensorflow as tf
from pathlib import Path
from keras_tuner import Hyperband
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, f1_score
import seaborn as sns
from tensorflow.keras.layers import GRU, Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization, Reshape, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import cv2

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
DATA_PATH = './data/images'
RNN_IMAGE_RESULTS = './results/rnn/images'
RNN_HISTORY_RESULTS = './results/rnn/history'
RNN_REPORT_RESULTS = './results/rnn/reports'
RNN_MODEL_RESULTS = './results/rnn/models'

TRAIN_DIRECTORY = 'train'
VALIDATION_DIRECTORY = 'val'
TEST_DIRECTORY = 'test'

SUB_DIRECTORIES = [TRAIN_DIRECTORY, TEST_DIRECTORY, VALIDATION_DIRECTORY]
CATEGORY_DIRECTORIES = ['Chickenpox', 'Cowpox', 'Healthy', 'HFMD', 'Measles', 'Monkeypox']

In [None]:
def count_images(dir_name):
    for dataset_type in SUB_DIRECTORIES:
        total = 0
        dir_type = os.path.join(dir_name, dataset_type)
        print(f"============ {dataset_type} dataset ===========")
        for category in os.listdir(dir_type):
            category_path = os.path.join(dir_type, category)
            if not os.path.isdir(category_path):
                continue
            images = [img for img in os.listdir(category_path) if img.endswith(('jpg','jpeg'))]
            print(f"Number of images in {category_path.split('/')[-1]}: {len(images)}")
            total += len(images)
        print(f"Total image count: {total}")
        print("")

In [None]:
count_images(DATA_PATH)

In [None]:
def load_images_to_df(base_path, dataset_type):
    path = os.path.join(base_path, dataset_type)
    image_dir = Path(path)

    file_paths = list(image_dir.glob(r'**/*.jpg'))
    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_paths))

    file_paths = pd.Series(file_paths, name='Path').astype(str)
    labels = pd.Series(labels, name='Label')

    image_df = pd.concat([file_paths, labels], axis=1)

    samples = []
    for record in image_df['Label'].unique():
        samples.append(image_df[image_df['Label']==record])
    image_df = pd.concat(samples, axis=0).sample(frac=1.0, random_state=42).reset_index(drop=True)

    return image_df

In [None]:
df_train = load_images_to_df(DATA_PATH, TRAIN_DIRECTORY)
df_train.head(10)

In [None]:
df_val = load_images_to_df(DATA_PATH, VALIDATION_DIRECTORY)
df_val.head(10)

In [None]:
df_test = load_images_to_df(DATA_PATH, TEST_DIRECTORY)
df_test.head(10)

In [None]:
def pie_plot_from_df(df, title):
    pie = df["Label"].value_counts()
    pie.plot(kind="pie", autopct="%1.2f%%")
    plt.ylabel("")
    plt.xlabel(title)
    plt.show()

In [None]:
def show_images_from_df(df, nrows, ncols):
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,8), subplot_kw={"xticks":[], "yticks":[]})

    for i, ax in enumerate(axes.flat):
        ax.imshow(plt.imread(df.Path[i], -1))
        ax.set_title(df.Label[i])
    plt.tight_layout()
    plt.show()

In [None]:
pie_plot_from_df(df_train, "Image Percentages from Train Dataset")

In [None]:
pie_plot_from_df(df_val, "Image Percentages from Val Dataset")

In [None]:
pie_plot_from_df(df_test, "Image Percentages from Test Dataset")

In [None]:
show_images_from_df(df_train, 6, 5)

In [None]:
show_images_from_df(df_val, 6, 5)

In [None]:
show_images_from_df(df_test, 6, 5)

In [None]:
def preprocess_images(df):
    images = []
    labels = []
    for index, row in df.iterrows():
        img = tf.keras.utils.load_img(row['Path'], target_size=(224, 224))
        img_array = tf.keras.utils.img_to_array(img)/255.0
        img_array = img_array.reshape(224, -1)
        images.append(img_array)
        labels.append(row['Label'])
    return np.array(images), np.array(labels)

In [None]:
train_images, train_labels = preprocess_images(df_train)
val_images, val_labels = preprocess_images(df_val)
test_images, test_labels = preprocess_images(df_test)

In [None]:
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)
test_labels = label_encoder.transform(test_labels)

In [None]:
train_labels = to_categorical(train_labels, len(CATEGORY_DIRECTORIES))
val_labels = to_categorical(val_labels, len(CATEGORY_DIRECTORIES))
test_labels = to_categorical(test_labels, len(CATEGORY_DIRECTORIES))

## Model Based on Random Values

In [None]:
model = Sequential()

model.add(GRU(units=128, return_sequences=True, input_shape=(224, 224 * 3)))
model.add(GRU(units=64))
model.add(Dropout(0.3))
model.add(Dense(32, activation="relu"))
model.add(Dense(len(CATEGORY_DIRECTORIES), activation="softmax"))

tf.keras.utils.plot_model(model, f"{RNN_IMAGE_RESULTS}/gru_model_summary.png", show_shapes=True, dpi=50)

In [None]:
model.compile(
    optimizer = Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy']
)

early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=20,
    mode="min",
    restore_best_weights=True,
    verbose=1
)

In [None]:
# Get history
history = model.fit(train_images, train_labels, validation_data=(val_images, val_labels),
                    batch_size=32, epochs=100, callbacks=[early_stopping])

In [None]:
pd.DataFrame(history.history)[['categorical_accuracy','val_categorical_accuracy']].plot()
plt.title("Accuracy")
plt.show()
plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_accuracy_plot.png")

In [None]:
pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title("Loss")
plt.show()
plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_loss_plot.png")

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], 'red', label='Training Loss')
plt.plot(history.history['val_loss'], 'green', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['categorical_accuracy'], 'orange', label='Training Accuracy')
plt.plot(history.history['val_categorical_accuracy'], 'blue', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()

plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_training_plots.png")
history_df = pd.DataFrame(history.history)
history_csv = f"{RNN_HISTORY_RESULTS}/gru_history.csv"
history_df.to_csv(history_csv)

In [None]:
y_pred = model.predict(test_images)

y_true = np.argmax(test_labels, axis=1)
y_pred = np.argmax(y_pred, axis=1)

In [None]:
report = classification_report(y_true, y_pred, target_names=label_encoder.classes_)
print("Classification Report:\n", report)

report_file = f"{RNN_REPORT_RESULTS}/gru_classification_report.txt"
with open(report_file, 'w') as file:
    file.write(report)

In [None]:
conf_matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='coolwarm', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')

heatmap_file = f'{RNN_IMAGE_RESULTS}/gru_confusion_matrix.png'
plt.savefig(heatmap_file)

In [None]:
model.save(f"{RNN_MODEL_RESULTS}/gru.h5")

## Hyper-parameter Tuned Model

In [None]:
def build_model_gru(hp):
    model = Sequential()

    model.add(GRU(units=hp.Int('units_gru_1', min_value=64, max_value=256, step=64),
    input_shape=(224, 224 * 3),
    return_sequences=True))

    model.add(GRU(units=hp.Int('units_gru_2', min_value=32, max_value=128, step=32)))
    model.add(Dropout(hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp.Int('units_dense', min_value=16, max_value=64, step=16), activation='relu'))
    model.add(Dense(len(CATEGORY_DIRECTORIES), activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy'])

    return model

In [None]:
stop_early=EarlyStopping(monitor='val_loss', patience=50, verbose=1, mode='min', restore_best_weights=True)

In [None]:
tuner = RandomSearch(
        build_model_gru,
        objective="val_categorical_accuracy",
        max_trials=5,
        executions_per_trial=1,
        directory="hyperparameter_tuning",
        project_name='GRU'
    )

tuner.search(train_images, train_labels, validation_data=(val_images, val_labels), epochs=20, callbacks=[stop_early])
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    
print(f"""
Best hyperparameters:
- Units in GRU Layer 1: {best_hps.get('units_gru_1')}
- Units in GRU Layer 2: {best_hps.get('units_gru_2')}
- Dropout Rate: {best_hps.get('dropout_rate')}
- Units in Dense Layer 1: {best_hps.get('units_dense')}
- Learning Rate: {best_hps.get('learning_rate')}
""")

In [None]:
model = tuner.hypermodel.build(best_hps)

tf.keras.utils.plot_model(model, f"{RNN_IMAGE_RESULTS}/gru_model_hp_summary.png", show_shapes=True, dpi=50)

In [None]:
history = model.fit(train_images, train_labels, validation_data=(val_images, val_labels),
                    batch_size=32, epochs=100, callbacks=[stop_early])

In [None]:
pd.DataFrame(history.history)[['categorical_accuracy','val_categorical_accuracy']].plot()
plt.title("Accuracy")
plt.show()
plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_hp_accuracy_plot.png")

In [None]:
pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title("Loss")
plt.show()
plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_hp_loss_plot.png")

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], 'red', label='Training Loss')
plt.plot(history.history['val_loss'], 'green', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['categorical_accuracy'], 'orange', label='Training Accuracy')
plt.plot(history.history['val_categorical_accuracy'], 'blue', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()

plt.savefig(f"{RNN_IMAGE_RESULTS}/gru_training_hp_plots.png")
history_df = pd.DataFrame(history.history)
history_csv = f"{RNN_HISTORY_RESULTS}/gru_hp_history.csv"
history_df.to_csv(history_csv)

In [None]:
y_pred = model.predict(test_images)

y_true = np.argmax(test_labels, axis=1)
y_pred = np.argmax(y_pred, axis=1)

In [None]:
report = classification_report(y_true, y_pred, target_names=label_encoder.classes_)
print("Classification Report:\n", report)

report_file = f"{RNN_REPORT_RESULTS}/gru_hp_classification_report.txt"
with open(report_file, 'w') as file:
    file.write(report)

In [None]:
conf_matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='coolwarm', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')

heatmap_file = f'{RNN_IMAGE_RESULTS}/gru_hp_confusion_matrix.png'
plt.savefig(heatmap_file)

In [None]:
model.save(f"{RNN_MODEL_RESULTS}/gru_hp.h5")