# <center>Recognizing Digits using Neural Networks</center>
####<center>Academic Year 2020 - 2021</center>
#### <center>Group 15</center>

<center>
João Vitor VARGAS SOARES <br>
Nicolau PEREIRA ALFF
</center>

---

This is a project exercise for the subject Intelligent Systems: Recognition and Reasoning at ENSIMAG.

We will program a neural network in Python 3 and Keras to recognize digits in the MNIST (Modified National Institute of Standards and Technology) dataset.





[Link for the report](https://docs.google.com/document/d/1Bdze_nderfIXkC_ewwc2OzQ3IVtiaJ2Qy45FCWcxR6Q/edit?usp=sharing)

[Link for the given example](https://colab.research.google.com/drive/1MSoUL8oucsJOT4CaEcELDJ67YrEBjCBi?usp=sharing)



#Fully connected multi-layer network

In [None]:
#Import all libraries we will need

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.metrics import accuracy_score, roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
def prepareData():
    # Upload dataset
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    num_classes = 10 # 10 numbers, from 0 to 9

    # Concatenate training and testing sets to redivide later
    x = np.array(np.concatenate((x_train, x_test)))
    y = np.array(np.concatenate((y_train, y_test)))

    # Scale images to the [0, 1] range
    x = x.astype("float32") / 255

    # Reshaping x from (70000, 28, 28) to (70000, 784)
    x = x.reshape(x.shape[0], 784)

    # Dividing total set into test set (10%), training set (80%) and validation set (10%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 , random_state=42)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=1/9 , random_state=42)

    # Saving class vector for further use
    y_test_classes = y_test

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_valid = keras.utils.to_categorical(y_valid, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    return x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes

def fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizer):
    # Initializing model
    model = Sequential()

    #Adding input layer with first_units as the number of neurons, input shape (784,) and activation
    model.add(Dense(units=layers[0]["units"], input_shape=(x_train.shape[1],), activation=layers[0]["activation"]))

    # Using dropout to avoid overfitting
    model.add(Dropout(layers[0]["dropout"]))

    # Adding hidden layers
    for layer in layers[1:-1]:
        model.add(Dense(units=layer["units"], activation = layer["activation"]))
        model.add(Dropout(layer["dropout"]))

    # Adding output layer
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Compiling model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

    # Early stop when accuracy diverges
    es = EarlyStopping(monitor='val_accuracy',
                        patience=8,
                        min_delta=0.001,
                        mode='max')

    # model fit with maximum of 500 epochs
    history = model.fit(x_train, y_train,
            batch_size=128, epochs=500,
            verbose=2,
            validation_data=(x_valid, y_valid),
            callbacks=es)
    return model, history

# This function makes the predictions and metrics for a determined model
def predictionsAndMetrics(model, x_test, y_test_classes):

    # predict probabilities for test set
    y_prediction_probs = model.predict(x_test, verbose=0)
    # predict crisp classes for test set
    y_prediction_classes = np.argmax(model.predict(x_test), axis=-1)

    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test_classes, y_prediction_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test_classes, y_prediction_classes, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test_classes, y_prediction_classes, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test_classes, y_prediction_classes, average='macro')
    print('F1 score: %f' % f1)
    # ROC AUC
    auc = roc_auc_score(y_test_classes, y_prediction_probs, multi_class='ovr')
    print('ROC AUC: %f' % auc)
    return accuracy, precision, recall, f1, auc

# This function varies the number of hidden layers from 1 to 5 for comparison
def varyingLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    for i in range(5):
        model, history = fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        layers = np.append(layers, [{"units": 512, "activation": 'relu', "dropout": 0.2}])
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

 # This function varies the number of units from 32 to 512 for comparison
def varyingUnits(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    for i in range(5):
        model, history = fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        layers[0]["units"] = layers[0]["units"] * 2
        layers[1]["units"] = layers[1]["units"] * 2
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

# This function varies the learning rate in the optimizer function Adam from 1 to 0.00001 for comparison
def varyingLearningRate(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    lr = 1
    for i in range(5):
        optimizer = keras.optimizers.Adam(learning_rate=lr)
        model, history = fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        lr = lr/10
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

# This function varies the type of optimizer used in the model compilation for comparison
def varyingOptimizer(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    lr = 1
    optimizers = np.array(['rmsprop', 'adam', 'sgd', 'adadelta', 'adamax'])
    for i in range(5):
        model, history = fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizers[i])
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        lr = lr/10
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

# This function varies the percentage of dropout from 0.1 to 0.5 for comparison
def varyingDropout(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    dropout = 0.1
    for i in range(5):
        layers[0]["dropout"] = dropout
        layers[1]["dropout"] = dropout
        model, history = fullyConnectedMultiLayerNetwork(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        dropout += 0.1
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes = prepareData()
layers = np.array([
                    {"units": 32, "activation": 'relu', "dropout": 0.2},
                    {"units": 32, "activation": 'relu', "dropout": 0.2},
                    ])
optimizer = keras.optimizers.RMSprop(learning_rate=0.001)

metrics_layers = varyingLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)
metrics_units = varyingUnits(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)
metrics_learningRate = varyingLearningRate(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)
metrics_optimizer = varyingOptimizer(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers)
metrics_dropout = varyingDropout(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)

print(metrics_layers)
print(metrics_units)
print(metrics_learningRate)
print(metrics_optimizer)
print(metrics_dropout)

Notes:

#CNN Implementation

In [None]:
# Libraries imports
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from glob import glob

from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import AveragePooling2D
from keras.layers import MaxPooling1D
from keras.layers import AveragePooling1D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.metrics import accuracy_score, roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# Model/Data parameters
num_classes = 10
input_shape = (28,28,1)
validation_ratio = 0.1

def prepareData():
    # Upload dataset
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    num_classes = 10 # 10 numbers, from 0 to 9

    # Concatenate training and testing sets to redivide later
    x = np.array(np.concatenate((x_train, x_test)))
    y = np.array(np.concatenate((y_train, y_test)))

    # Scale images to the [0, 1] range
    x = x.astype("float32") / 255

    x = np.expand_dims(x, -1)
    print("x shape:", x.shape)
    print(x.shape[0], "samples")


    # Dividing total set into test set (10%), training set (80%) and validation set (10%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 , random_state=42)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=1/9 , random_state=42)

    # Saving class vector for further use
    y_test_classes = y_test

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_valid = keras.utils.to_categorical(y_valid, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")
    print(x_valid.shape[0], "validation samples")

    return x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes

def CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer):
    # Initializing model
    model = Sequential()
    model.add(Conv2D(32,(3,3), activation = 'relu', kernel_initializer='he_uniform', input_shape=(28,28,1)))
    model.add(MaxPooling2D((2,2)))
    model.add(Conv2D(64, (3,3), activation = 'relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D((2,2)))


    # Adding hidden layers
    for layer in layers[1:-1]:
        model.add(Conv2D(layer["filters"],layer["kernelSize"], activation = layer["activation"], kernel_initializer='he_uniform'))
        model.add(MaxPooling2D((2,2)))

    # Adding output layer
    model.add(Flatten())
    # Using dropout to avoid overfitting
    model.add(Dropout(layers[0]["dropout"]))
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Compiling model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)
    #model.summary()

    # Early stop when accuracy diverges
    es = EarlyStopping(monitor='val_accuracy',
                        patience=8,
                        min_delta=0.001,
                        mode='max')

    # model fit with maximum of 500 epochs
    history = model.fit(x_train, y_train,
            batch_size=128, epochs=500,
            verbose=2,
            validation_data=(x_valid, y_valid),
            callbacks=es)


    return model, history

# This function makes the predictions and metrics for a determined model
def predictionsAndMetrics(model, x_test, y_test_classes):

    # predict probabilities for test set
    y_prediction_probs = model.predict(x_test, verbose=0)
    # predict crisp classes for test set
    y_prediction_classes = np.argmax(model.predict(x_test), axis=-1)

    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test_classes, y_prediction_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test_classes, y_prediction_classes, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test_classes, y_prediction_classes, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test_classes, y_prediction_classes, average='macro')
    print('F1 score: %f' % f1)
    # ROC AUC
    auc = roc_auc_score(y_test_classes, y_prediction_probs, multi_class='ovr')
    print('ROC AUC: %f' % auc)
    return accuracy, precision, recall, f1, auc

def baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    print("Baseline")
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
    accuracy, precision, recall, f1, auc = predictionsAndMetrics(model, x_test, y_test_classes)
    layers = np.append(layers, [{"filters": 64,"kernelSize": (3,3) ,"activation": 'relu', "dropout": 0.2}])
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics



# This function varies the number of Convolutional layers from 3 to 7 for comparison
def varyingConvLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    print("Varying Number of Convolutional Layers")
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    for i in range(5):
        model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        layers = np.append(layers, [{"filters": 64,"kernelSize": (2,2) ,"activation": 'relu', "dropout": 0.2}])
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

# This function varies the number of filters per layers
def varyingFiltersLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    print("Varying Number of Filters per Layer")
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    for i in range(5):
        model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        layers = np.append(layers,[{"filters": 8*(2**i),"kernelSize": (3,3) ,"activation": 'relu', "dropout": 0.2}])
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics



x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes = prepareData()

layers = np.array([
                    {"filters": 64,"kernelSize": (3,3) ,"activation": 'relu', "dropout": 0.2},
                    ])

optimizer = keras.optimizers.Adam(learning_rate=0.01)


metrics_Baseline = baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)
#metrics_convolutional_layers = varyingConvLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)
#metrics_filters_per_layer = varyingFiltersLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)


print(metrics_Baseline)
#print(metrics_convolutional_layers)
#print(metrics_filters_per_layer)

## Different Types and Sizes of Pooling Layers

In [None]:
# Libraries imports
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from glob import glob

from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.metrics import accuracy_score, roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# Model/Data parameters
num_classes = 10
input_shape = (28,28,1)
validation_ratio = 0.1

def prepareData():
    # Upload dataset
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    num_classes = 10 # 10 numbers, from 0 to 9

    # Concatenate training and testing sets to redivide later
    x = np.array(np.concatenate((x_train, x_test)))
    y = np.array(np.concatenate((y_train, y_test)))

    # Scale images to the [0, 1] range
    x = x.astype("float32") / 255

    x = np.expand_dims(x, -1)
    print("x shape:", x.shape)
    print(x.shape[0], "samples")


    # Dividing total set into test set (10%), training set (80%) and validation set (10%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 , random_state=42)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=1/9 , random_state=42)

    # Saving class vector for further use
    y_test_classes = y_test

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_valid = keras.utils.to_categorical(y_valid, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")
    print(x_valid.shape[0], "validation samples")

    return x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes

def CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer):

#Different Types and sizes of Pooling Layers
#model.add(MaxPooling2D((2,2))) --- Baseline
#model.add(MaxPooling2D((4,4)))
#model.add(AveragePooling2D((2,2)))
#model.add(AveragePooling2D((1,1)))
#model.add(MaxPooling2D(2,1))

    # Initializing model
    model = Sequential()
    model.add(Conv2D(32,(3,3), activation = 'relu', kernel_initializer='he_uniform', input_shape=(28,28,1)))
    model.add(MaxPooling2D(2,1))
    model.add(Conv2D(64, (3,3), activation = 'relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D(2,1))
    # Using dropout to avoid overfitting
    model.add(Dropout(0.2))

    # Adding hidden layers
    for layer in layers[1:-1]:
        model.add(Conv2D(layer["filters"],layer["kernelSize"], activation = layer["activation"], kernel_initializer='he_uniform'))
        model.add(MaxPooling2D(2,1))

    model.add(Dropout(0.2))
    # Adding output layer
    model.add(Flatten())
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Compiling model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)
    model.summary()

    # Early stop when accuracy diverges
    es = EarlyStopping(monitor='val_accuracy',
                        patience=8,
                        min_delta=0.001,
                        mode='max')

    # model fit with maximum of 500 epochs
    history = model.fit(x_train, y_train,
            batch_size=128, epochs=500,
            verbose=0,
            validation_data=(x_valid, y_valid),
            callbacks=es)


    return model, history

# This function makes the predictions and metrics for a determined model
def predictionsAndMetrics(model, x_test, y_test_classes):

    # predict probabilities for test set
    y_prediction_probs = model.predict(x_test, verbose=0)
    # predict crisp classes for test set
    y_prediction_classes = np.argmax(model.predict(x_test), axis=-1)

    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test_classes, y_prediction_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test_classes, y_prediction_classes, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test_classes, y_prediction_classes, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test_classes, y_prediction_classes, average='macro')
    print('F1 score: %f' % f1)
    # ROC AUC
    auc = roc_auc_score(y_test_classes, y_prediction_probs, multi_class='ovr')
    print('ROC AUC: %f' % auc)
    return accuracy, precision, recall, f1, auc

def baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    print("Baseline")
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    auc = 0
    model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
    accuracy, precision, recall, f1, auc = predictionsAndMetrics(model, x_test, y_test_classes)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics



x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes = prepareData()

layers = np.array([

                    ])

optimizer = keras.optimizers.Adam(learning_rate=0.01)


metrics_Baseline = baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)



print(metrics_Baseline)


x shape: (70000, 28, 28, 1)
70000 samples
56000 train samples
7000 test samples
7000 validation samples
Baseline
Model: "sequential_79"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_137 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_121 (MaxPoolin (None, 25, 25, 32)        0         
_________________________________________________________________
conv2d_138 (Conv2D)          (None, 23, 23, 64)        18496     
_________________________________________________________________
max_pooling2d_122 (MaxPoolin (None, 22, 22, 64)        0         
_________________________________________________________________
dropout_75 (Dropout)         (None, 22, 22, 64)        0         
_________________________________________________________________
dropout_76 (Dropout)         (None, 22, 22, 64)        0         
______

## CNN - Different Numbers of filters per Layer

In [None]:
# Libraries imports
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from glob import glob

from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.metrics import accuracy_score, roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# Model/Data parameters
num_classes = 10
input_shape = (28,28,1)
validation_ratio = 0.1

def prepareData():
    # Upload dataset
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    num_classes = 10 # 10 numbers, from 0 to 9

    # Concatenate training and testing sets to redivide later
    x = np.array(np.concatenate((x_train, x_test)))
    y = np.array(np.concatenate((y_train, y_test)))

    # Scale images to the [0, 1] range
    x = x.astype("float32") / 255

    x = np.expand_dims(x, -1)
    print("x shape:", x.shape)
    print(x.shape[0], "samples")


    # Dividing total set into test set (10%), training set (80%) and validation set (10%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 , random_state=42)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=1/9 , random_state=42)

    # Saving class vector for further use
    y_test_classes = y_test

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_valid = keras.utils.to_categorical(y_valid, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")
    print(x_valid.shape[0], "validation samples")

    return x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes

def CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer):

#Different Numbers of filters per Layer
########## First cl  |  Second cl
#Baseline     32          64
#             16          32
#             16          16
#             8          16

    # Initializing model
    model = Sequential()
    model.add(Conv2D(8,(3,3), activation = 'relu', kernel_initializer='he_uniform', input_shape=(28,28,1)))
    model.add(MaxPooling2D(2,1))
    model.add(Conv2D(16, (3,3), activation = 'relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D(2,1))
    # Using dropout to avoid overfitting
    model.add(Dropout(0.2))

    # Adding hidden layers
    for layer in layers[1:-1]:
        model.add(Conv2D(layer["filters"],layer["kernelSize"], activation = layer["activation"], kernel_initializer='he_uniform'))
        model.add(MaxPooling2D(2,1))

    model.add(Dropout(0.2))
    # Adding output layer
    model.add(Flatten())
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Compiling model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)
    model.summary()

    # Early stop when accuracy diverges
    es = EarlyStopping(monitor='val_accuracy',
                        patience=8,
                        min_delta=0.001,
                        mode='max')

    # model fit with maximum of 500 epochs
    history = model.fit(x_train, y_train,
            batch_size=128, epochs=500,
            verbose=2,
            validation_data=(x_valid, y_valid),
            callbacks=es)


    return model, history

# This function makes the predictions and metrics for a determined model
def predictionsAndMetrics(model, x_test, y_test_classes):

    # predict probabilities for test set
    y_prediction_probs = model.predict(x_test, verbose=0)
    # predict crisp classes for test set
    y_prediction_classes = np.argmax(model.predict(x_test), axis=-1)

    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test_classes, y_prediction_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test_classes, y_prediction_classes, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test_classes, y_prediction_classes, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test_classes, y_prediction_classes, average='macro')
    print('F1 score: %f' % f1)
    # ROC AUC
    auc = roc_auc_score(y_test_classes, y_prediction_probs, multi_class='ovr')
    print('ROC AUC: %f' % auc)
    return accuracy, precision, recall, f1, auc

def baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    print("Baseline")
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    auc = 0
    model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
    accuracy, precision, recall, f1, auc = predictionsAndMetrics(model, x_test, y_test_classes)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics



x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes = prepareData()

layers = np.array([

                    ])

optimizer = keras.optimizers.Adam(learning_rate=0.01)


metrics_Baseline = baseline(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)



print(metrics_Baseline)


x shape: (70000, 28, 28, 1)
70000 samples
56000 train samples
7000 test samples
7000 validation samples
Baseline
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 25, 25, 8)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 23, 23, 16)        1168      
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 22, 22, 16)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 22, 22, 16)        0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 22, 22, 16)        0         
_______

##CNN - Different Fully-connected Layers


In [None]:
# Libraries imports
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from glob import glob

from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from matplotlib import pyplot
from sklearn.metrics import accuracy_score, roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# Model/Data parameters
num_classes = 10
input_shape = (28,28,1)
validation_ratio = 0.1

def prepareData():
    # Upload dataset
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    num_classes = 10 # 10 numbers, from 0 to 9

    # Concatenate training and testing sets to redivide later
    x = np.array(np.concatenate((x_train, x_test)))
    y = np.array(np.concatenate((y_train, y_test)))

    # Scale images to the [0, 1] range
    x = x.astype("float32") / 255

    x = np.expand_dims(x, -1)
    print("x shape:", x.shape)
    print(x.shape[0], "samples")


    # Dividing total set into test set (10%), training set (80%) and validation set (10%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 , random_state=42)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=1/9 , random_state=42)

    # Saving class vector for further use
    y_test_classes = y_test

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_valid = keras.utils.to_categorical(y_valid, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")
    print(x_valid.shape[0], "validation samples")

    return x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes

def CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer):
    # Initializing model
    model = Sequential()
    model.add(Conv2D(32,(2,2), activation = 'relu', kernel_initializer='he_uniform', input_shape=(28,28,1)))
    model.add(MaxPooling2D((2,2)))
    model.add(Conv2D(64, (2,2), activation = 'relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D((2,2)))
    model.add(Flatten())
    # Using dropout to avoid overfitting
    model.add(Dropout(layers[0]["dropout"]))

    # Adding hidden layers
    for layer in layers[1:-1]:
        model.add(Dense(units=layer["units"], activation = layer["activation"]))
        model.add(Dropout(layer["dropout"]))

    # Adding output layer
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Compiling model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)
    model.summary()

    # Early stop when accuracy diverges
    es = EarlyStopping(monitor='val_accuracy',
                        patience=8,
                        min_delta=0.001,
                        mode='max')

    # model fit with maximum of 500 epochs
    history = model.fit(x_train, y_train,
            batch_size=128, epochs=500,
            verbose=0,
            validation_data=(x_valid, y_valid),
            callbacks=es)


    return model, history

# This function makes the predictions and metrics for a determined model
def predictionsAndMetrics(model, x_test, y_test_classes):

    # predict probabilities for test set
    y_prediction_probs = model.predict(x_test, verbose=0)
    # predict crisp classes for test set
    y_prediction_classes = np.argmax(model.predict(x_test), axis=-1)

    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test_classes, y_prediction_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test_classes, y_prediction_classes, average='macro')
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test_classes, y_prediction_classes, average='macro')
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test_classes, y_prediction_classes, average='macro')
    print('F1 score: %f' % f1)
    # ROC AUC
    auc = roc_auc_score(y_test_classes, y_prediction_probs, multi_class='ovr')
    print('ROC AUC: %f' % auc)
    return accuracy, precision, recall, f1, auc

# This function varies the number of hidden layers from 1 to 5 for comparison
def varyingFConnLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer):
    accuracy = np.zeros(5)
    precision = np.zeros(5)
    recall = np.zeros(5)
    f1 = np.zeros(5)
    auc = np.zeros(5)
    for i in range(5):
        model, history = CNNModel(layers, x_train, y_train, x_valid, y_valid, optimizer)
        accuracy[i], precision[i], recall[i], f1[i], auc[i] = predictionsAndMetrics(model, x_test, y_test_classes)
        layers = np.append(layers, [{"units": 32, "activation": 'relu', "dropout": 0.2}])
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "auc": auc}
    return metrics

x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes = prepareData()

layers = np.array([
                    {"filters": 32,"kernelSize": (2,2) ,"activation": 'relu', "dropout": 0.2},
                    ])

optimizer = keras.optimizers.Adam(learning_rate=0.01)

layers = np.array([
                    {"units": 32, "activation": 'relu', "dropout": 0.2},
                    {"units": 32, "activation": 'relu', "dropout": 0.2},
                    ])


metrics_fullyConnected_layers = varyingFConnLayers(x_test, x_train, x_valid, y_test, y_train, y_valid, y_test_classes, layers, optimizer)

print(metrics_fullyConnected_layers)

x shape: (70000, 28, 28, 1)
70000 samples
56000 train samples
7000 test samples
7000 validation samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 27, 27, 32)        160       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 12, 12, 64)        8256      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2304)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2304)              0         
________________

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.888143
Precision: 0.889485
Recall: 0.887476
F1 score: 0.887712
ROC AUC: 0.989106
{'accuracy': array([0.98228571, 0.96814286, 0.95771429, 0.11428571, 0.88814286]), 'precision': array([0.9821239 , 0.96793727, 0.95743178, 0.01142857, 0.88948489]), 'recall': array([0.98222242, 0.96812703, 0.95829644, 0.1       , 0.88747591]), 'f1': array([0.9821495 , 0.96791965, 0.95762021, 0.02051282, 0.88771156]), 'auc': array([0.99970345, 0.99909762, 0.99817873, 0.5       , 0.9891062 ])}


Notes:


#References

* [How to Develop a CNN for MNIST
Handwritten Digit Classification
](https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-from-scratch-for-mnist-handwritten-digit-classification/)
* [Understand the Impact of Learning Rate on Neural Network Performance
](https://machinelearningmastery.com/understand-the-dynamics-of-learning-rate-on-deep-learning-neural-networks/)
* [How Do Convolutional Layers Work in Deep Learning Neural Networks?
](https://machinelearningmastery.com/convolutional-layers-for-deep-learning-neural-networks/)
* [A Gentle Introduction to k-fold Cross-Validation
](https://machinelearningmastery.com/k-fold-cross-validation/)
* [MNIST Handwritten Digit Recognition in Keras](https://nextjournal.com/gkoehler/digit-recognition-with-keras)
* [Keras for Beginners: Building Your First Neural Network
](https://victorzhou.com/blog/keras-neural-network-tutorial/)
* [MNIST - Deep Neural Network with Keras
](https://www.kaggle.com/prashant111/mnist-deep-neural-network-with-keras)
* [Introduction to Multilayer Neural Networks with TensorFlow’s Keras API
](https://towardsdatascience.com/introduction-to-multilayer-neural-networks-with-tensorflows-keras-api-abf4f813959)

