# Dense Layer Unit Count Comparison

In standard VGG-Net architectures, the network consists of two hidden 
dense layers consisting of 4096 units each. This notebook will explore
different values to find the best performing value for our model, namely
the different powers of 2 from 128 to 4096.


In [None]:
from gc import collect
from typing import Tuple

import matplotlib.pyplot as plt
import numpy as np
from h5py import File
from pandas import read_csv
from sklearn.metrics import accuracy_score, classification_report, \
    confusion_matrix
from tensorflow.config.experimental import list_physical_devices, \
    set_memory_growth
from tensorflow.keras.backend import clear_session
from tensorflow.keras.callbacks import CSVLogger, LearningRateScheduler, \
    ModelCheckpoint, EarlyStopping
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.random import set_random_seed


### Model Creation Function


In [None]:
# VGG-13
def create_model(input_shape: Tuple[int, int, int], num_classes: int,
                 num_dense_units: int = 4096) -> Model:
    inputs = Input(shape=input_shape)
    
    layer = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(inputs)
    layer = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)

    layer = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Flatten()(layer)
    layer = Dense(units=num_dense_units, activation="relu", 
                  kernel_initializer="he_normal",
                  bias_initializer="he_normal")(layer)
    layer = Dense(units=num_dense_units, activation="relu", 
                  kernel_initializer="he_normal",
                  bias_initializer="he_normal")(layer)
    layer = Dense(num_classes, activation="softmax")(layer)
    
    model = Model(inputs=inputs, outputs=layer)
    model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss="categorical_crossentropy", metrics=["accuracy"])
    
    return model


### Other Functions


In [None]:
def refresh_session():
    # Call this before training a new model, to free up memory from the 
    # previous model
    clear_session()
    try:
        del model
    except NameError:
        pass
    collect()
    
    
def import_dataset(filepath: str = "./dataset.hdf5") \
        -> Tuple[np.ndarray, np.ndarray, np.ndarray, 
                 np.ndarray, np.ndarray, np.ndarray]:
    file = File(filepath, "r")
    train_data = file.get("tr_data")[()]
    val_data = file.get("val_data")[()]
    test_data = file.get("ts_data")[()]
    train_labels = file.get("tr_labels")[()]
    val_labels = file.get("val_labels")[()]
    test_labels = file.get("ts_labels")[()]
    
    return train_data, val_data, test_data, \
           train_labels, val_labels, test_labels


def get_test_results(test_model: Model, test_data: np.ndarray, 
                     test_labels: np.ndarray) -> Tuple:
    predicts = test_model.predict(test_data)
    pred_out = np.argmax(predicts, axis=1)
    test_out = np.argmax(test_labels, axis=1)
    labels = ["car", "heavy vehicles", "motorcycle"]
    
    return accuracy_score(test_out, pred_out), \
           confusion_matrix(test_out, pred_out), \
           classification_report(test_out, pred_out, target_names=labels)


def get_learn_rate(epoch: int) -> float:
    if epoch <= 10:
        lr = 1e-4
    elif epoch <= 20:
        lr = 5e-5
    elif epoch <= 30:
        lr = 1e-5
    elif epoch <= 40:
        lr = 5e-6
    else:
        lr = 1e-6
    print(f"Learning rate: {lr}")
    
    return lr


### Initialise Environment

In [None]:
# Configure tensorflow to optimise GPU utilisation
gpu_list = list_physical_devices("GPU")
for gpu in gpu_list:
    set_memory_growth(gpu, True)
del gpu_list

# Fix tensorflow random seed
set_random_seed(324)

tr_dat, val_dat, ts_dat, tr_lbls, val_lbls, ts_lbls = import_dataset()

in_shape = (tr_dat.shape[1], tr_dat.shape[2], tr_dat.shape[3])
num_cls = tr_lbls.shape[1]

lr_scheduler = LearningRateScheduler(get_learn_rate)
early_stopper = EarlyStopping(monitor="val_loss", patience=10, verbose=1, 
                              mode="min", restore_best_weights=True) 

# Test data is not needed in this notebook, so free up the memory
del ts_dat
del ts_lbls
collect()


### Model Creation and Training

In [None]:
# 128 dense units
refresh_session()
data_gen = ImageDataGenerator(
    rotation_range=45, width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.2, horizontal_flip=True)

model_dense_units = 128
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/128denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/128denseunits_log.csv")

model.fit_generator(
    data_gen.flow(tr_dat, tr_lbls, batch_size=32, shuffle=True), 
    steps_per_epoch=(len(tr_dat) / 32), epochs=50, verbose=2, 
    callbacks=[checkpoint, logger, lr_scheduler, early_stopper], 
    validation_data=(val_dat, val_lbls))


In [None]:
# 256 dense units
refresh_session()
data_gen = ImageDataGenerator(
    rotation_range=45, width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.2, horizontal_flip=True)

model_dense_units = 256
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/256denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/256denseunits_log.csv")

model.fit_generator(
    data_gen.flow(tr_dat, tr_lbls, batch_size=32, shuffle=True), 
    steps_per_epoch=(len(tr_dat) / 32), epochs=50, verbose=2, 
    callbacks=[checkpoint, logger, lr_scheduler, early_stopper], 
    validation_data=(val_dat, val_lbls))


In [None]:
# 512 dense units
refresh_session()
data_gen = ImageDataGenerator(
    rotation_range=45, width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.2, horizontal_flip=True)

model_dense_units = 512
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/512denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/512denseunits_log.csv")

model.fit_generator(
    data_gen.flow(tr_dat, tr_lbls, batch_size=32, shuffle=True), 
    steps_per_epoch=(len(tr_dat) / 32), epochs=50, verbose=2, 
    callbacks=[checkpoint, logger, lr_scheduler, early_stopper], 
    validation_data=(val_dat, val_lbls))


In [None]:
# 1024 dense units
refresh_session()
data_gen = ImageDataGenerator(
    rotation_range=45, width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.2, horizontal_flip=True)

model_dense_units = 1024
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/1024denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/1024denseunits_log.csv")

model.fit_generator(
    data_gen.flow(tr_dat, tr_lbls, batch_size=32, shuffle=True), 
    steps_per_epoch=(len(tr_dat) / 32), epochs=50, verbose=2, 
    callbacks=[checkpoint, logger, lr_scheduler, early_stopper], 
    validation_data=(val_dat, val_lbls))


In [None]:
# 2048 dense units
refresh_session()
data_gen = ImageDataGenerator(
    rotation_range=45, width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.2, horizontal_flip=True)

model_dense_units = 2048
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/2048denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/2048denseunits_log.csv")

model.fit_generator(
    data_gen.flow(tr_dat, tr_lbls, batch_size=32, shuffle=True), 
    steps_per_epoch=(len(tr_dat) / 32), epochs=50, verbose=2, 
    callbacks=[checkpoint, logger, lr_scheduler, early_stopper], 
    validation_data=(val_dat, val_lbls))


In [None]:
# Training phase is complete: free training data memory
del tr_dat
del tr_lbls
refresh_session()


### Data Visualisation

In [None]:
acc_scores = dict()
conf_matrices = dict()
class_reports = dict()

plt.style.use("ggplot")


In [None]:
# 128 dense units
refresh_session()
model = load_model("./trained_models/128denseunits_best.hdf5")
acc_scores[128], conf_matrices[128], class_reports[128] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 128 dense units: {acc_scores[128]}")
print("Confusion Matrix:")
print(conf_matrices[128])
print(class_reports[128])

log_dense128 = read_csv("./training_logs/128denseunits_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense128["val_loss"], label="loss")
plt.plot(log_dense128["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense128["loss"], label="train loss")
plt.plot(log_dense128["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense128["acc"], label="train accuracy")
plt.plot(log_dense128["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))


In [None]:
# 256 dense units
refresh_session()
model = load_model("./trained_models/256denseunits_best.hdf5")
acc_scores[256], conf_matrices[256], class_reports[256] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 256 dense units: {acc_scores[256]}")
print("Confusion Matrix:")
print(conf_matrices[256])
print(class_reports[256])

log_dense256 = read_csv("./training_logs/256denseunits_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense256["val_loss"], label="loss")
plt.plot(log_dense256["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense256["loss"], label="train loss")
plt.plot(log_dense256["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense256["acc"], label="train accuracy")
plt.plot(log_dense256["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))



In [None]:
# 512 dense units
refresh_session()
model = load_model("./trained_models/512denseunits_best.hdf5")
acc_scores[512], conf_matrices[512], class_reports[512] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 512 dense units: {acc_scores[512]}")
print("Confusion Matrix:")
print(conf_matrices[512])
print(class_reports[512])

log_dense512 = read_csv("./training_logs/512denseunits_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense512["val_loss"], label="loss")
plt.plot(log_dense512["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense512["loss"], label="train loss")
plt.plot(log_dense512["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense512["acc"], label="train accuracy")
plt.plot(log_dense512["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))



In [None]:
# 1024 dense units
refresh_session()
model = load_model("./trained_models/1024denseunits_best.hdf5")
acc_scores[1024], conf_matrices[1024], class_reports[1024] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 1024 dense units: {acc_scores[1024]}")
print("Confusion Matrix:")
print(conf_matrices[1024])
print(class_reports[1024])

log_dense1024 = read_csv("./training_logs/1024denseunits_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense1024["val_loss"], label="loss")
plt.plot(log_dense1024["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense1024["loss"], label="train loss")
plt.plot(log_dense1024["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense1024["acc"], label="train accuracy")
plt.plot(log_dense1024["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))



In [None]:
# 2048 dense units
refresh_session()
model = load_model("./trained_models/2048denseunits_best.hdf5")
acc_scores[2048], conf_matrices[2048], class_reports[2048] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 2048 dense units: {acc_scores[2048]}")
print("Confusion Matrix:")
print(conf_matrices[2048])
print(class_reports[2048])

log_dense2048 = read_csv("./training_logs/2048denseunits_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense2048["val_loss"], label="loss")
plt.plot(log_dense2048["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense2048["loss"], label="train loss")
plt.plot(log_dense2048["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense2048["acc"], label="train accuracy")
plt.plot(log_dense2048["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))



In [None]:
# 4096 dense units
# Reuse existing model
refresh_session()
model = load_model("./trained_models/vgg13_best.hdf5")
acc_scores[4096], conf_matrices[4096], class_reports[4096] \
    = get_test_results(model, val_dat, val_lbls)

print(f"Validation accuracy for 4096 dense units: {acc_scores[4096]}")
print("Confusion Matrix:")
print(conf_matrices[4096])
print(class_reports[4096])

log_dense4096 = read_csv("./training_logs/vgg13_log.csv")

plt.figure(figsize=[12.5, 12.5])
plt.subplot(311)
plt.plot(log_dense4096["val_loss"], label="loss")
plt.plot(log_dense4096["val_acc"], label="accuracy")
plt.title("Validation")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(312)
plt.plot(log_dense4096["loss"], label="train loss")
plt.plot(log_dense4096["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(313)
plt.plot(log_dense4096["acc"], label="train accuracy")
plt.plot(log_dense4096["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))


#### Cross-Model Comparison

In [None]:
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.plot(log_dense128["val_loss"], label="128 units")
plt.plot(log_dense256["val_loss"], label="256 units")
plt.plot(log_dense512["val_loss"], label="512 units")
plt.plot(log_dense1024["val_loss"], label="1024 units")
plt.plot(log_dense2048["val_loss"], label="2048 units")
plt.plot(log_dense4096["val_loss"], label="4096 units")
plt.title("Validation Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.plot(log_dense128["val_acc"], label="128 units")
plt.plot(log_dense256["val_acc"], label="256 units")
plt.plot(log_dense512["val_acc"], label="512 units")
plt.plot(log_dense1024["val_acc"], label="1024 units")
plt.plot(log_dense2048["val_acc"], label="2048 units")
plt.plot(log_dense4096["val_acc"], label="4096 units")
plt.title("Validation Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))



