In [None]:
# import libraries

import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.vis_utils import plot_model

In [None]:
# run the data_processing file to get the training and validation sets

%run ../data_processing.ipynb

In [None]:
# compile parameters (can be updated for different variations and test)
X_TRAIN = x_train
Y_TRAIN = y_train

X_VALID = x_valid
Y_VALID = y_valid

X_TEST = x_test
Y_TEST = y_test

VALIDATION = (x_valid, y_valid)

ACTIVATION_FUNCTION = 'relu'
ACTIVATION_FUNCTION_OUTPUT = 'softmax'
BATCH_SIZE = 32
EPOCHS = 50
INPUT_DIM = 12
KERNEL_INITIALIZER = 'he_uniform'
LEARNING_RATE = 0.03
LOSS_FUNCTION = 'categorical_crossentropy'
METRICS = ['accuracy']
OPTIMIZER = tf.optimizers.Adam(learning_rate=LEARNING_RATE)
VERBOSE = 0

# list of architectures to test (can be updated to test different architectures)
architectures = ['128-2', '256-2', '256-128-2', '128-256-128-2', '512-256-128-96-2']

# dataframe to hold the loss and accuracy for each architecture
df_result = pd.DataFrame(columns=['architecture', 'training_loss',
                                  'training_accuracy', 'validation_loss',
                                  'validation_accuracy', 'test_loss',
                                  'test_accuracy'])

# clears the dataset to avoid redundancy
df_result = df_result.iloc[0:0]

In [None]:
# create a baseline classifier
def create_baseline(arch):

    # init a sequential NN
    classifier = Sequential()

    # Define the model architecture
    
    # 1st layer - takes in input
    classifier.add(Dense(units=arch[0], kernel_initializer=KERNEL_INITIALIZER,
                         activation=ACTIVATION_FUNCTION, input_dim=INPUT_DIM))
    
    # dynamic hidden layers based on architecture
    if len(arch) > 2:
        for layer in range(1,len(arch)-1):
            classifier.add(Dense(units=arch[layer],
                                 kernel_initializer=KERNEL_INITIALIZER,
                                 activation=ACTIVATION_FUNCTION))

    # output layer
    classifier.add(Dense(units=arch[-1], kernel_initializer=KERNEL_INITIALIZER,
                         activation=ACTIVATION_FUNCTION_OUTPUT))

    return classifier

In [None]:
# loop through the architectures list to test all the architectures
for ARCHITECTURE in architectures:
    
    arch = ARCHITECTURE.split('-')

    # create a baseline model
    model = create_baseline(arch)
    plot_model(model, to_file='figures/model_' + ARCHITECTURE + '_plot.png', show_shapes=True, show_layer_names=True)

    # compile the model with values defined above
    model.compile(optimizer=OPTIMIZER,
                  loss=LOSS_FUNCTION, metrics=METRICS)
    
    # train the model
    history = model.fit(X_TRAIN, Y_TRAIN, validation_data=VALIDATION,
                            epochs=EPOCHS, batch_size=BATCH_SIZE,
                            verbose=VERBOSE)

    # plot
    pd.DataFrame(history.history).plot(figsize=(8,5))
    plt.grid = True
    plt.ylim(0, 1)
    plt.title('Architecture: ' + str(ARCHITECTURE))
    plt.savefig('figures/architecture_'+str(ARCHITECTURE)+'.png')
    plt.show()

    # evaluation - loss and accuracy
    model_train_loss, model_train_accuracy = model.evaluate(X_TRAIN, Y_TRAIN,
                                                            verbose=VERBOSE)
    model_valid_loss, model_valid_accuracy = model.evaluate(X_VALID, Y_VALID,
                                                            verbose=VERBOSE)
    model_test_loss, model_test_accuracy = model.evaluate(X_TEST, Y_TEST,
                                                          verbose=VERBOSE)

    # add the evaluation results to the dataset
    df_result = pd.concat([pd.DataFrame([[ARCHITECTURE,
                                          model_train_loss,
                                          model_train_accuracy,
                                          model_valid_loss,
                                          model_valid_accuracy,
                                          model_test_loss,
                                          model_test_accuracy]],
                                          columns=df_result.columns),
                                          df_result],
                                          ignore_index=True)

In [None]:
# display the evaluation results dataframe

df_result

In [None]:
# visualize loss for training, validation and test sets

# x values
x = ['training_loss', 'validation_loss', 'test_loss']

# training and validation loss for each architecture
for _idx in range(df_result.shape[0]):
    plt.plot(x, [df_result.iloc[_idx]['training_loss'],
                 df_result.iloc[_idx]['validation_loss'],
                 df_result.iloc[_idx]['test_loss']],
                 label=str(df_result.iloc[_idx]['architecture']))

# plot
plt.grid = True
plt.ylim(0, 1)
plt.title('Model Loss with varying Architectures')
plt.legend(loc='upper right')
plt.savefig('figures/loss.png')
plt.show()

In [None]:
# visualize accuracy for training, validation and test sets

# x values
x = ['training_accuracy', 'validation_accuracy', 'test_accuracy']

# training and validation accuracy for each architecture
for _idx in range(df_result.shape[0]):
    plt.plot(x, [df_result.iloc[_idx]['training_accuracy'],
                 df_result.iloc[_idx]['validation_accuracy'],
                 df_result.iloc[_idx]['test_accuracy']],
                 label=str(df_result.iloc[_idx]['architecture']))

# plot
plt.grid = True
plt.ylim(0, 1)
plt.title('Model Accuracy with varying Architectures')
plt.legend(loc='upper right')
plt.savefig('figures/accuracy.png')
plt.show()