# Deep Learning with the Fashion-MNIST dataset

Inspired by: https://www.tensorflow.org/tutorials/keras/classification

## Facts about the dataset

### User Input

In [None]:
# Ratio of training images to validation images. This has to be set, because the dataset don't has separate validation images
# e.g. 0.8 -> 80% training images, 20% validation images
# Don't use the validation_split parameter in fit() method to control the validation image set by ourselves and do computation on it (e.g. show images)
TRAIN_TO_VALID_RATIO = 0.8

### Definitions and load dataset

In [None]:
import numpy as np
from tensorflow import keras

fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

split_num = int(np.floor(len(train_images) * TRAIN_TO_VALID_RATIO))

validation_images = train_images[split_num:]
validation_labels = train_labels[split_num:]

train_images = train_images[:split_num]
train_labels = train_labels[:split_num]

print(f"#Training-Images:\t{len(train_images)}\n"
      f"#Validation-Images:\t{len(validation_images)}\n"
      f"#Testing-Images:\t{len(test_images)}")

assert train_images.shape[1] == test_images.shape[1] == validation_images.shape[1]
assert train_images.shape[2] == test_images.shape[2] == validation_images.shape[2]
input_shape = (train_images.shape[1], train_images.shape[2])

class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

## Visualization of the dataset

### Helper functions

In [None]:
from matplotlib import pyplot as plt, colormaps
from typing import Union, List

NUM_TYPE = Union[List[int], int, range]


def show_images(dataset: np.ndarray, labels: np.ndarray, classes: List[str],
                num: NUM_TYPE) -> None:
    """
    Internal function for showing multiple images in one plot, including their colorbar.
    This should only be called inside the API functions, never directly.
    
    :param dataset: Dataset in which the image is.
    :param labels: Labels in which the label of the image is.
    :param classes: List containing all class names as string.
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    assert len(dataset) == len(labels)

    if isinstance(num, int):
        num = [num]

    for i in num:
        assert (0 <= i < len(dataset)) or (-len(dataset) <= i < 0)

    len_sqrt = int(np.ceil(np.sqrt(len(num))))

    plt.figure(figsize=(len_sqrt * 3, len_sqrt * 3))
    for index, image_num in enumerate(num):
        plt.subplot(len_sqrt, len_sqrt, index + 1)

        if np.max(dataset[image_num]) <= 1:
            plt.imshow(dataset[image_num], cmap=colormaps["gray"])
        else:
            plt.imshow(dataset[image_num])
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.colorbar()
        plt.xlabel(classes[labels[image_num]])

    plt.show()


def show_train_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the train dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(train_images, train_labels, class_names, num)


def show_validation_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the validation dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(validation_images, validation_labels, class_names, num)


def show_test_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the test dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(test_images, test_labels, class_names, num)

### Visualize train/validation/test images

In [None]:
show_test_images(range(0, 5))

show_train_images(range(0, 5))

show_validation_images(range(0, 5))

## Data Preparation

### Scaling of image pixel values

The image pixel values have to be scaled (normalized) in the range of 0-1. This prevents the computation of high numeric values in the neural network and therefore reduces the complexity.
The Fashion MNIST dataset uses 8-bit images which means the pixels have values between 0 and 255.   

In [None]:
prepared_train_images = train_images / 255.0
prepared_train_labels = train_labels

prepared_validation_images = validation_images / 255.0
prepared_validation_labels = validation_labels

prepared_test_images = test_images / 255.0
prepared_test_labels = test_labels

### Helper functions

In [None]:
def show_prepared_train_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the prepared train dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(prepared_train_images, train_labels, class_names, num)


def show_prepared_validation_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the prepared validation dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(prepared_validation_images, prepared_validation_labels, class_names, num)


def show_prepared_test_images(num: NUM_TYPE) -> None:
    """
    API function to show images of the prepared test dataset with their colorbar.
    
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    """
    show_images(prepared_test_images, test_labels, class_names, num)

### Visualize prepared train/validation/test images

In [None]:
show_prepared_test_images(range(0, 5))

show_prepared_train_images(range(0, 5))

show_prepared_validation_images(range(0, 5))

## Model-Building

### Helper functions

In [None]:
from keras import layers, optimizers, losses, metrics, activations


def build_model(num_hidden_layers: int, num_hidden_layers_neurons: Union[List[int], int],
                hidden_activation_functions, optimizer, loss_function,
                metrics_list) -> keras.models.Model:
    """
    API function for building and compiling the model.
    
    :param num_hidden_layers: Number of hidden layers.
    :param num_hidden_layers_neurons: Integer or list of integers, specifying the number of neurons in the hidden layers. If this is only an integer, all hidden layers will have the same number of neurons.
    :param hidden_activation_functions: Activation function as string, from keras.activations, or a list of these. If no list is passed, all hidden layers will have the same activation function.
    :param optimizer: Optimizer used for compiling the model.
    :param loss_function: Loss function used for compiling the model.
    :param metrics_list: Metrics used for compiling the model.
    :return: The built and compiled model.
    """
    if isinstance(num_hidden_layers_neurons, list):
        assert num_hidden_layers == len(num_hidden_layers_neurons)
    else:
        num_hidden_layers_neurons = [num_hidden_layers_neurons] * num_hidden_layers

    if isinstance(hidden_activation_functions, list):
        assert num_hidden_layers == len(hidden_activation_functions)
    else:
        hidden_activation_functions = [hidden_activation_functions] * num_hidden_layers

    if not isinstance(metrics_list, list):
        metrics_list = [metrics_list]

    ret_model = keras.models.Sequential(
        name=f"model_sequential_{num_hidden_layers}_{'_'.join(str(neurons) for neurons in num_hidden_layers_neurons)}")
    ret_model.add(keras.Input(shape=(28, 28)))

    # Use layers.Flatten to flatten the input n-dimensional array into an one-dimensional array
    ret_model.add(layers.Flatten())

    for index in range(num_hidden_layers):
        ret_model.add(layers.Dense(num_hidden_layers_neurons[index],
                                   activation=hidden_activation_functions[index]))

    ret_model.add(layers.Dense(len(class_names)))

    ret_model.summary()

    ret_model.compile(optimizer=optimizer,
                      loss=loss_function,
                      metrics=metrics_list)

    return ret_model

### Build model

Build model with 2 Hidden Layers, both with 128 neurons. The hidden layers have the activation function ReLU. The model is optimized with an AdamOptimizer, the loss function is SparseCategoricalCrossentropy and the only metric is SparseCategoricalAccuracy.

In [None]:
num_hidden_layer = 2
num_hidden_neurons_per_layer = 128
activation_function = activations.relu
optimizer = optimizers.Adam()
loss_function = losses.SparseCategoricalCrossentropy(from_logits=True)
metrics_function = metrics.SparseCategoricalAccuracy()

model = build_model(num_hidden_layer, num_hidden_neurons_per_layer, activation_function, optimizer,
                    loss_function, metrics_function)

## Model-Training

### Helper functions

In [None]:
def train(model_train: keras.models.Model, batch_size: int, epochs: int):
    """
    API function for training a model.

    :param model_train: Model to train.
    :param batch_size: Size of a batch.
    :param epochs: Epochs to train.
    """
    return model_train.fit(prepared_train_images, prepared_train_labels, batch_size=batch_size, epochs=epochs,
                     validation_data=(prepared_validation_images, prepared_validation_labels))

### Training

In [None]:
batch_size = 32
epochs = 10

model_history = train(model, batch_size, epochs)

## Model-Evaluation

### Helper functions

In [None]:
def evaluate(model_evaluate: keras.models.Model, model_history, metric: str) -> None:
    """
    API function for evaluating a trained model.

    :param model_evaluate: Trained model to evaluate.
    :param model_history: Training history.
    """
    loss, accuracy = model_evaluate.evaluate(prepared_test_images, prepared_test_labels)
    print(f"\nAccuracy on test images: {accuracy}, Loss: {loss}")

    plt.plot(model_history.epoch, model_history.history[metric],
             label=f"Train - {model_evaluate.name}")
    plt.plot(model_history.epoch, model_history.history["val_" + metric],
             label=f"Validation - {model_evaluate.name}",linestyle="--")
    plt.xlabel('Epoch')
    plt.ylabel(metric)
    plt.legend()

### Evaluation

In [None]:
plt.figure(1, figsize=(10, 5))
evaluate(model, model_history, "loss")
plt.show()

plt.figure(1, figsize=(10, 5))
evaluate(model, model_history, "sparse_categorical_accuracy")
plt.show()

## Predictions

### Predictable data preparation

For the ease of using the data of the dataset all original images and labels of the dataset are concatenated to offer images which can be predicated and their corresponding labels.
The prediction can be executed on custom 8-Bit 28x28 images, too (but there is no directly support in this notebook - it can be build with tkinter's filechooser for example).  

In [None]:
tmp_train_images = train_images.copy()
tmp_train_labels = train_labels.copy()

tmp_test_images = test_images.copy()
tmp_test_labels = test_labels.copy()

predictable_images = np.concatenate((tmp_train_images, tmp_test_images)) / 255.0
predictable_labels = np.concatenate((tmp_train_labels, tmp_test_labels))

### Helper functions

In [None]:
from typing import Tuple

def show_images_prediction(predictions_result_data: List[Tuple[int, list]], text: str) -> None:
    """
    Internal function for showing multiple images in one plot, including predicted class-dependent probability-bar.
    This should only be called inside the API functions, never directly.
    
    :param predictions_result_data: Results of the prediction as a list of tuples with format: (image_num, prediction)
    :param text: Text that should be displayed in the result plots.
    """    
    len_sqrt = int(np.ceil(np.sqrt(len(predictions_result_data * 2))))

    plt.figure(figsize=(len_sqrt * 3, len_sqrt * 3))
    plt.suptitle(text, fontsize=14)
    for index, prediction in enumerate(predictions_result_data):
        predicted_image_num = prediction[0]
        prediction_result = prediction[1]

        predicted_class_num = np.argmax(prediction_result)
        correct_class_num = predictable_labels[predicted_image_num]

        prediction_probability = np.max(prediction_result)

        predicted_class = class_names[predicted_class_num]
        correct_class = class_names[correct_class_num]

        # Image
        plt.subplot(len_sqrt, len_sqrt, 2 * index + 1)

        if np.max(predictable_images[predicted_image_num]) <= 1:
            plt.imshow(predictable_images[predicted_image_num], cmap=colormaps["gray"])
        else:
            plt.imshow(predictable_images[predicted_image_num])

        plt.xticks([])
        plt.yticks([])
        plt.grid(False)

        if predicted_class == correct_class:
            color = "green"
        else:
            color = "red"

        plt.xlabel(f"{predicted_class} {int(prediction_probability * 100)}% ({correct_class})",
                   color=color)

        # Probability Bar Chart
        plt.subplot(len_sqrt, len_sqrt, 2 * index + 1 + 1)

        plt.xticks(range(len(class_names)))
        plt.grid(False)
        plt.ylim([0, 1])

        bar_plt = plt.bar(range(len(class_names)), prediction_result, color="#777777")

        bar_plt[predicted_class_num].set_color('red')
        bar_plt[correct_class_num].set_color('blue')

    plt.show()


def predict_images(prediction_model: keras.models.Model, num: NUM_TYPE) -> List[Tuple[int, list]]:
    """
    API function for predicting images of the predictable_images dataset and the corresponding labels.

    :param prediction_model: Trained model.
    :param num: Integer, range or list of integers specifying the index of the images in the dataset and labels list.
    :returns: Prediction results for further processing.
    """
    prediction_data = predictable_images[num]
    if isinstance(num, int):
        prediction_data.shape = (1, prediction_data.shape[0], prediction_data.shape[1])

    predictions = prediction_model.predict(prediction_data)

    prediction_result_data = [(num[index], prediction) for index, prediction in
                              enumerate(predictions)]
    
    return prediction_result_data

def build_prediction_model(input_model: keras.models.Model) -> keras.models.Model:
    """
    API function for building a prediction model. Precondition is, that the last layer of the input model is a
    dense layer with linear activation.

    :param input_model: Trained model.
    :returns: Prediction model.
    """
    # The prediction model uses the architecture of the trained model, but adds a Softmax layer to convert the output to
    # class-dependent probabilities.
    return keras.models.Sequential([input_model, layers.Softmax()], name=f"prediction_{input_model.name}")


### Build prediction model

In [None]:
prediction_model = build_prediction_model(model)

### Execute prediction

In [None]:
prediction_result = predict_images(prediction_model, range(0, 5))
show_images_prediction(prediction_result, prediction_model.name)

## Further experiments

### Hidden layer variations

#### Training

In [None]:
# Hyperparameters - Build
num_hidden_neurons_per_layer = 128
activation_function = activations.relu
optimizer = optimizers.Adam
loss_function = losses.SparseCategoricalCrossentropy(from_logits=True)
metrics_function = metrics.SparseCategoricalAccuracy()

# Hyperparameters - Training
batch_size = 32
epochs = 10

models = []
for num_hidden_layer in range(1, 6):    
    model = build_model(num_hidden_layer, num_hidden_neurons_per_layer, activation_function, optimizer(),
                        loss_function, metrics_function)
    model_history = train(model, batch_size, epochs)
    models.append({"num_hidden_layer": num_hidden_layer, "model": model, "history": model_history})

#### Evaluation

In [None]:
plt.figure(len(models), figsize=(15, 10))
for model in models:
    evaluate(model["model"], model["history"], "loss")
plt.show()

plt.figure(len(models), figsize=(15,10))
for model in models:
    evaluate(model["model"], model["history"], "sparse_categorical_accuracy")
plt.show()

#### Prediction

In [None]:
for model in models:
    prediction_model = build_prediction_model(model["model"])
    
    prediction_result = predict_images(prediction_model, range(0, 5))
    show_images_prediction(prediction_result, f"Hidden Layers: {model['num_hidden_layer']}")

### Hidden Neurons variations on 2 hidden layer model

#### Training

In [None]:
from itertools import product

# Hyperparameters - Build
num_hidden_layers = 2
activation_function = activations.relu
optimizer = optimizers.Adam
loss_function = losses.SparseCategoricalCrossentropy(from_logits=True)
metrics_function = metrics.SparseCategoricalAccuracy()

# Hyperparameters - Training
batch_size = 32
epochs = 10

models = []
for num_hidden_layer_neurons in ([1 << exponent[0], 1 << exponent[1]] for exponent in product(range(2, 10), range(2, 10))):
    model = build_model(num_hidden_layers, num_hidden_layer_neurons, activation_function, optimizer(),
                        loss_function, metrics_function)
    model_history = train(model, batch_size, epochs)
    models.append({"num_hidden_layer_neurons": num_hidden_layer_neurons, "model": model, "history": model_history})

#### Evaluation

In [None]:
plt.figure(len(models), figsize=(15, 10))
for model in models:
    evaluate(model["model"], model["history"], "loss")
plt.show()

plt.figure(len(models), figsize=(15,10))
for model in models:
    evaluate(model["model"], model["history"], "sparse_categorical_accuracy")
plt.show()

#### Prediction

In [None]:
for model in models:
    prediction_model = build_prediction_model(model["model"])
    
    prediction_result = predict_images(prediction_model, range(0, 5))
    show_images_prediction(prediction_result, f"Hidden Neurons: {model['num_hidden_layer_neurons']}")

### Activation function variation

#### Training

In [None]:
models = []

# Hyperparameters - Build
num_hidden_layers = 2
num_hidden_neurons_per_layer = 128
optimizer = optimizers.Adam
loss_function = losses.SparseCategoricalCrossentropy(from_logits=True)
metrics_function = metrics.SparseCategoricalAccuracy()

# Hyperparameters - Training
batch_size = 32
epochs = 10

for activation_function in [activations.relu, activations.sigmoid, activations.tanh]:
    model = build_model(num_hidden_layers, num_hidden_neurons_per_layer, activation_function, optimizer(),
                        loss_function, metrics_function)
    model_history = train(model, batch_size, epochs)
    models.append({"activation_function": activation_function, "model": model, "history": model_history})

#### Evaluation

In [None]:
plt.figure(len(models), figsize=(15, 10))
for model in models:
    evaluate(model["model"], model["history"], "loss")
plt.show()

plt.figure(len(models), figsize=(15,10))
for model in models:
    evaluate(model["model"], model["history"], "sparse_categorical_accuracy")
plt.show()

#### Prediction

In [None]:
for model in models:
    prediction_model = build_prediction_model(model["model"])
    
    prediction_result = predict_images(prediction_model, range(0, 5))
    show_images_prediction(prediction_result, f"Activation function: {model['activation_function']}")

### Different Architectures

In [None]:
# Hyperparameters - Training
batch_size = 32
epochs = 10

model_1 = build_model(8, 16, activations.sigmoid, optimizers.Adam(), losses.MeanSquaredError(), metrics.SparseCategoricalAccuracy())
model_1_history = train(model, batch_size, epochs)

model_2 = build_model(1, 256, activations.tanh, optimizers.Adam(), losses.SparseCategoricalCrossentropy(from_logits=True), metrics.SparseCategoricalAccuracy())
model_2_history = train(model, batch_size, epochs)

model_3 = build_model(5, [200,150,8,39,1], activations.tanh, optimizers.Adam(learning_rate=0.5), losses.SparseCategoricalCrossentropy(from_logits=True), metrics.SparseCategoricalAccuracy())
model_3_history = train(model, batch_size, epochs)

model_4 = build_model(8, 50000, activations.relu, optimizers.Adam(learning_rate=0.00005), losses.SparseCategoricalCrossentropy(from_logits=True), metrics.SparseCategoricalAccuracy())
model_4_history = train(model, batch_size, epochs)

In [None]:
plt.figure(4, figsize=(10,5))
evaluate(model_1, model_1_history, "sparse_categorical_accuracy")
evaluate(model_2, model_2_history, "sparse_categorical_accuracy")
evaluate(model_3, model_3_history, "sparse_categorical_accuracy")
evaluate(model_4, model_4_history, "sparse_categorical_accuracy")
plt.show()

plt.figure(4, figsize=(10, 5))
evaluate(model_1, model_1_history, "loss")
evaluate(model_2, model_2_history, "loss")
evaluate(model_3, model_3_history, "loss")
evaluate(model_4, model_4_history, "loss")

plt.show()



In [None]:
prediction_model_1 = build_prediction_model(model_1)
prediction_result_1 = predict_images(prediction_model_1, range(0, 26))
show_images_prediction(prediction_result_1, "Model 1")

prediction_model_2 = build_prediction_model(model_2)
prediction_result_2 = predict_images(prediction_model_2, range(0, 26))
show_images_prediction(prediction_result_2, "Model 2")

prediction_model_3 = build_prediction_model(model_3)
prediction_result_3 = predict_images(prediction_model_3, range(0, 26))
show_images_prediction(prediction_result_3, "Model 3")

prediction_model_4 = build_prediction_model(model_4)
prediction_result_4 = predict_images(prediction_model_4, range(0, 26))
show_images_prediction(prediction_result_4, "Model 4")