# Dense Layer Unit Count Comparison

In standard VGG-Net architectures, the network consists of two hidden 
dense layers consisting of 4096 units each. This notebook will explore
different values to find the best performing value for our model.


In [None]:
from gc import collect
from typing import Tuple

import numpy as np
from h5py import File
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow import ConfigProto, Session
from tensorflow.keras.backend import set_session, clear_session
from tensorflow.keras.callbacks import CSVLogger, LearningRateScheduler, \
    ModelCheckpoint
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.random import set_random_seed
from pandas import read_csv
import matplotlib.pyplot as plt


In [None]:
# Improve GPU memory utilisation
config = ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = Session(config=config)
set_session(sess)

# Fix tensorflow random seed
set_random_seed(324)



### Model Creation Function


In [None]:
def create_model(input_shape: Tuple[int, int, int], num_classes: int,
                 num_dense_units: int = 4096) -> Model:
    inputs = Input(shape=input_shape)
    
    layer = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(inputs)
    layer = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)

    layer = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding="same", 
                   activation="relu", kernel_initializer="he_normal",
                   bias_initializer="he_normal")(layer)
    layer = MaxPool2D(pool_size=(2, 2), strides=2)(layer)
    
    layer = Flatten()(layer)
    layer = Dense(units=num_dense_units, activation="relu", 
                  kernel_initializer="he_normal",
                  bias_initializer="he_normal")(layer)
    layer = Dense(units=num_dense_units, activation="relu", 
                  kernel_initializer="he_normal",
                  bias_initializer="he_normal")(layer)
    layer = Dense(num_classes, activation="softmax")(layer)
    
    model = Model(inputs=inputs, outputs=layer)
    model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss="categorical_crossentropy", metrics=["accuracy"])
    
    return model


### Other Functions


In [None]:
def import_dataset(filepath: str = "./dataset.hdf5") \
        -> Tuple[np.ndarray, np.ndarray, np.ndarray, 
                 np.ndarray, np.ndarray, np.ndarray]:
    file = File(filepath, "r")
    train_data = file.get("tr_data")[()]
    val_data = file.get("val_data")[()]
    test_data = file.get("ts_data")[()]
    train_labels = file.get("tr_labels")[()]
    val_labels = file.get("val_labels")[()]
    test_labels = file.get("ts_labels")[()]
    
    return train_data, val_data, test_data, \
           train_labels, val_labels, test_labels


def get_test_results(test_model: Model, test_data: np.ndarray, 
                     test_labels: np.ndarray) -> Tuple:
    predicts = test_model.predict(test_data)
    pred_out = np.argmax(predicts, axis=1)
    test_out = np.argmax(test_labels, axis=1)
    labels = ["car", "heavy vehicles", "motorcycle"]
    
    return accuracy_score(test_out, pred_out), \
           confusion_matrix(test_out, pred_out), \
           classification_report(test_out, pred_out, target_names=labels)


def get_learn_rate(epoch: int) -> float:
    lr = 1e-4
    if epoch > 10:
        lr = 1e-5
    elif epoch > 20:
        lr = 1e-6
    elif epoch > 30:
        lr = 1e-7
    elif epoch > 40:
        lr = 1e-8
    elif epoch > 50:
        lr = 1e-9
        
    print(f"Learning rate: {lr}")
    
    return lr


### Data Initialisation

In [None]:
tr_dat, val_dat, ts_dat, tr_lbls, val_lbls, ts_lbls = import_dataset()

in_shape = (tr_dat.shape[1], tr_dat.shape[2], tr_dat.shape[3])
num_cls = tr_lbls.shape[1]

model = None
lr_scheduler = LearningRateScheduler(get_learn_rate)

# Clear test data from memory as we're not using it during fine-tuning
del ts_dat
del ts_lbls
collect()


### Model Creation and Training

We compare between different powers of 2 for computational efficiency, 
from 128 up to 4096.

In [None]:
# 128 dense units
model_dense_units = 128
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/128denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/128denseunits_log.csv")

model.fit(tr_dat, tr_lbls, batch_size=32, validation_data=(val_dat, val_lbls), 
          epochs=20, verbose=2, shuffle=True, 
          callbacks=[checkpoint, logger, lr_scheduler])

model.save(f"./trained_models/128denseunits_20epoch.hdf5")


In [None]:
# 256 dense units
model_dense_units = 256
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/256denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/256denseunits_log.csv")

model.fit(tr_dat, tr_lbls, batch_size=32, validation_data=(val_dat, val_lbls), 
          epochs=20, verbose=2, shuffle=True, 
          callbacks=[checkpoint, logger, lr_scheduler])

model.save(f"./trained_models/256denseunits_20epoch.hdf5")


In [None]:
# 512 dense units
model_dense_units = 512
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/512denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/512denseunits_log.csv")

model.fit(tr_dat, tr_lbls, batch_size=32, validation_data=(val_dat, val_lbls), 
          epochs=20, verbose=2, shuffle=True, 
          callbacks=[checkpoint, logger, lr_scheduler])

model.save(f"./trained_models/512denseunits_20epoch.hdf5")


In [None]:
# 1024 dense units
model_dense_units = 1024
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/1024denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/1024denseunits_log.csv")

model.fit(tr_dat, tr_lbls, batch_size=32, validation_data=(val_dat, val_lbls), 
          epochs=20, verbose=2, shuffle=True, 
          callbacks=[checkpoint, logger, lr_scheduler])

model.save(f"./trained_models/1024denseunits_20epoch.hdf5")


In [None]:
# 2048 dense units
model_dense_units = 2048
model = create_model(in_shape, num_cls, model_dense_units)
print(model.summary())

checkpoint = ModelCheckpoint("./trained_models/2048denseunits_best.hdf5", 
                             monitor="val_loss", verbose=0, 
                             save_best_only=True, mode="min")
logger = CSVLogger("./training_logs/2048denseunits_log.csv")

model.fit(tr_dat, tr_lbls, batch_size=32, validation_data=(val_dat, val_lbls), 
          epochs=20, verbose=2, shuffle=True, 
          callbacks=[checkpoint, logger, lr_scheduler])

model.save(f"./trained_models/2048denseunits_20epoch.hdf5")


In [None]:
# Training stage complete: free training data memory
del tr_dat
del tr_lbls
collect()


### Data Visualisation

In [None]:
acc_scores = dict()
conf_matrices = dict()
class_reports = dict()


In [None]:
# 128 dense units
model_128 = create_model(in_shape, num_cls, 128)
model_128.load_weights("./trained_models/128denseunits_best.hdf5")
model_128.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[128], conf_matrices[128], class_reports[128] \
    = get_test_results(model_128, val_dat, val_lbls)

print(f"Validation accuracy for 128 dense units: {acc_scores[128]}")
print("Confusion Matrix:")
print(conf_matrices[128])
print(class_reports[128])

dense128_log = read_csv("./training_logs/128denseunits_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense128_log["loss"], label="train loss")
plt.plot(dense128_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense128_log["acc"], label="train accuracy")
plt.plot(dense128_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense128_log


In [None]:
# 256 dense units
model_256 = create_model(in_shape, num_cls, 256)
model_256.load_weights("./trained_models/256denseunits_best.hdf5")
model_256.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[256], conf_matrices[256], class_reports[256] \
    = get_test_results(model_256, val_dat, val_lbls)

print(f"Validation accuracy for 256 dense units: {acc_scores[256]}")
print("Confusion Matrix:")
print(conf_matrices[256])
print(class_reports[256])

dense256_log = read_csv("./training_logs/256denseunits_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense256_log["loss"], label="train loss")
plt.plot(dense256_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense256_log["acc"], label="train accuracy")
plt.plot(dense256_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense256_log



In [None]:
# 512 dense units
model_512 = create_model(in_shape, num_cls, 512)
model_512.load_weights("./trained_models/512denseunits_best.hdf5")
model_512.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[512], conf_matrices[512], class_reports[512] \
    = get_test_results(model_512, val_dat, val_lbls)

print(f"Validation accuracy for 512 dense units: {acc_scores[512]}")
print("Confusion Matrix:")
print(conf_matrices[512])
print(class_reports[512])

dense512_log = read_csv("./training_logs/512denseunits_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense512_log["loss"], label="train loss")
plt.plot(dense512_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense512_log["acc"], label="train accuracy")
plt.plot(dense512_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense512_log



In [None]:
# 1024 dense units
model_1024 = create_model(in_shape, num_cls, 1024)
model_1024.load_weights("./trained_models/1024denseunits_best.hdf5")
model_1024.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[1024], conf_matrices[1024], class_reports[1024] \
    = get_test_results(model_1024, val_dat, val_lbls)

print(f"Validation accuracy for 1024 dense units: {acc_scores[1024]}")
print("Confusion Matrix:")
print(conf_matrices[1024])
print(class_reports[1024])

dense1024_log = read_csv("./training_logs/1024denseunits_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense1024_log["loss"], label="train loss")
plt.plot(dense1024_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense1024_log["acc"], label="train accuracy")
plt.plot(dense1024_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense1024_log



In [None]:
# 2048 dense units
model_2048 = create_model(in_shape, num_cls, 2048)
model_2048.load_weights("./trained_models/2048denseunits_best.hdf5")
model_2048.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[2048], conf_matrices[2048], class_reports[2048] \
    = get_test_results(model_2048, val_dat, val_lbls)

print(f"Validation accuracy for 2048 dense units: {acc_scores[2048]}")
print("Confusion Matrix:")
print(conf_matrices[2048])
print(class_reports[2048])

dense2048_log = read_csv("./training_logs/2048denseunits_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense2048_log["loss"], label="train loss")
plt.plot(dense2048_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense2048_log["acc"], label="train accuracy")
plt.plot(dense2048_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense2048_log



In [None]:
# 4096 dense units
# Reuse existing model
model_4096 = create_model(in_shape, num_cls, 4096)
model_4096.load_weights("./trained_models/vgg16conv1_best.hdf5")
model_4096.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="categorical_crossentropy", metrics=["accuracy"])
acc_scores[4096], conf_matrices[4096], class_reports[4096] \
    = get_test_results(model_4096, val_dat, val_lbls)

print(f"Validation accuracy for 4096 dense units: {acc_scores[4096]}")
print("Confusion Matrix:")
print(conf_matrices[4096])
print(class_reports[4096])

dense4096_log = read_csv("./training_logs/vgg16conv1_log.csv")

plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense4096_log["loss"], label="train loss")
plt.plot(dense4096_log["val_loss"], label="validation loss")
plt.title("Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense4096_log["acc"], label="train accuracy")
plt.plot(dense4096_log["val_acc"], label="validation accuracy")
plt.title("Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

# Free memory
clear_session()
collect()
del dense4096_log


#### Cross-Model Comparison

In [None]:
plt.style.use("ggplot")
plt.figure(figsize=[10, 7.5])
plt.subplot(211)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense128_log["val_loss"], label="128 units")
plt.plot(dense256_log["val_loss"], label="256 units")
plt.plot(dense512_log["val_loss"], label="512 units")
plt.plot(dense1024_log["val_loss"], label="1024 units")
plt.plot(dense2048_log["val_loss"], label="2048 units")
plt.title("Validation Loss")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))

plt.subplot(212)
plt.xticks([0, 5, 10, 15, 20])
plt.plot(dense128_log["val_acc"], label="128 units")
plt.plot(dense256_log["val_acc"], label="256 units")
plt.plot(dense512_log["val_acc"], label="512 units")
plt.plot(dense1024_log["val_acc"], label="1024 units")
plt.plot(dense2048_log["val_acc"], label="2048 units")
plt.title("Validation Accuracy")
plt.legend(loc="upper left", bbox_to_anchor=(1.0, 1.0))


### Conclusion

# TO BE UPDATED

From the above, we can observe that overfitting occurs in all models 
between the 5-10th epoch, and maximum validation accuracy is about 65%. 
However, this is before any regularisation measures are taken.

Based on the cross-model analysis comparisons, it can be seen that 
VGG-16 and VGG-16(Conv1) achieved the lowest validation loss, and at 
those points, VGG-16(Conv1) achieved the higher validation accuracy.

Therefore, we shall proceed to optimise the VGG-16(Conv1) model with
hyperparameter tuning, followed by regularisation.