In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adadelta
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.nn import softmax, relu, relu6
import numpy as np

# Variables

TRAIN_SET_SIZE = 60000 #DO NOT CHANGE
TEST_SET_SIZE = 10000 #DO NOT CHANGE
NORMALIZATION = 255.0 #DO NOT CHANGE
IMAGE_LENGTH = 28 #Length of MNIST image side DO NOT CHANGE
DIGITS_NUMBER = 10 #DO NOT CHANGE
CHANNELS_NUMBER = 1 #Number 1 means input images are in grey scale DO NOT CHANGE

CONVOLUTIONAL_LAYERS = 2
DENSE_LAYERS = 2
EPOCHS = 10 #Number of times network will be exposed to training set
DROPOUT_CONVOLUTIONAL = 0.25
DROPOUT_DENSE = 0.5
BATCH_SIZE_TRAIN = 32
BATCH_SIZE_CONV1 = 32
BATCH_SIZE_CONV2 = 64
BATCH_SIZE_CONV3 = 64
BATCH_SIZE_CONV4 = 64
BATCH_SIZE_CONV5 = 64
BATCH_SIZE_DENSE1 = 128
BATCH_SIZE_DENSE2 = 128
BATCH_SIZE_DENSE3 = 128
BATCH_SIZE_DENSE4 = 128
KERNEL_SIZE = 3
POOL_SIZE = 2

KERNEL_IN='random_uniform' #Initializer for the layer weights
BIAS_IN='ones' #Initializer for the bias
ACTIVATION_OUTPUT_LAYER=softmax #Activation function
ACTIVATION_OTHER_LAYERS=relu

MODEL_OPTIMIZER=Adadelta() #Recommended to use default parameters
LOSS='categorical_crossentropy'
METRICS=['accuracy']

#MNIST database, normalization and reshaping

(x_train_original, y_train_original), (x_test_original, y_test_original) = mnist.load_data()
x_train_normalized, x_test_normalized = x_train_original / NORMALIZATION, x_test_original / NORMALIZATION

x_train = x_train_normalized.reshape(TRAIN_SET_SIZE,IMAGE_LENGTH,IMAGE_LENGTH,CHANNELS_NUMBER) #Reshaping from 1-D to 4-D is needed for convolutional networks
x_test = x_test_normalized.reshape(TEST_SET_SIZE,IMAGE_LENGTH,IMAGE_LENGTH,CHANNELS_NUMBER)

y_train = to_categorical(y_train_original, DIGITS_NUMBER) #Converts classes vectors to binary classes matrices Ex: 1 -> [0,1,0,0,0,0,0,0,0,0]
y_test = to_categorical(y_test_original, DIGITS_NUMBER)

# Layers

ConvolutionalLayer1 = Conv2D(BATCH_SIZE_CONV1, kernel_size=(KERNEL_SIZE, KERNEL_SIZE), activation=ACTIVATION_OTHER_LAYERS, input_shape=(IMAGE_LENGTH,IMAGE_LENGTH,CHANNELS_NUMBER))
ConvolutionalLayer2 = Conv2D(BATCH_SIZE_CONV2, (KERNEL_SIZE, KERNEL_SIZE), activation=ACTIVATION_OTHER_LAYERS)
ConvolutionalLayer3 = Conv2D(BATCH_SIZE_CONV3, (KERNEL_SIZE, KERNEL_SIZE), activation=ACTIVATION_OTHER_LAYERS)
ConvolutionalLayer4 = Conv2D(BATCH_SIZE_CONV4, (KERNEL_SIZE, KERNEL_SIZE), activation=ACTIVATION_OTHER_LAYERS)
ConvolutionalLayer5 = Conv2D(BATCH_SIZE_CONV5, (KERNEL_SIZE, KERNEL_SIZE), activation=ACTIVATION_OTHER_LAYERS)
MaxPoolingLayer = MaxPooling2D(pool_size=(POOL_SIZE, POOL_SIZE))
DenseLayer1 = Dense(BATCH_SIZE_DENSE1, activation=ACTIVATION_OTHER_LAYERS)
DenseLayer2 = Dense(BATCH_SIZE_DENSE2, activation=ACTIVATION_OTHER_LAYERS)
DenseLayer3 = Dense(BATCH_SIZE_DENSE3, activation=ACTIVATION_OTHER_LAYERS)
DenseLayer4 = Dense(BATCH_SIZE_DENSE4, activation=ACTIVATION_OTHER_LAYERS)
DenseLayer5 = Dense(DIGITS_NUMBER, activation=ACTIVATION_OUTPUT_LAYER)

# Model

model = Sequential()
model.add(ConvolutionalLayer1)
if CONVOLUTIONAL_LAYERS > 1:
  model.add(ConvolutionalLayer2)
if CONVOLUTIONAL_LAYERS > 2:
  model.add(ConvolutionalLayer3)
if CONVOLUTIONAL_LAYERS > 3:
  model.add(ConvolutionalLayer4)
if CONVOLUTIONAL_LAYERS > 4:
  model.add(ConvolutionalLayer5)
model.add(MaxPoolingLayer)
model.add(Dropout(DROPOUT_CONVOLUTIONAL))
model.add(Flatten())
if DENSE_LAYERS > 1:
  model.add(DenseLayer1)
  model.add(Dropout(DROPOUT_DENSE))
if DENSE_LAYERS > 2:
  model.add(DenseLayer2)
  model.add(Dropout(DROPOUT_DENSE))
if DENSE_LAYERS > 3:
  model.add(DenseLayer3)
  model.add(Dropout(DROPOUT_DENSE))
if DENSE_LAYERS > 4:
  model.add(DenseLayer4)
  model.add(Dropout(DROPOUT_DENSE))
model.add(DenseLayer5)

# Compilation

model.compile(loss=LOSS, optimizer=MODEL_OPTIMIZER, metrics=METRICS)

# Training

model.fit(x_train, y_train, batch_size=BATCH_SIZE_TRAIN, epochs=EPOCHS)

# Evaluation

evaluationVector = model.evaluate(x_test, y_test)

print("\nError rate over test set was: "+str((1-evaluationVector[1])*100)+"%")

# Label printing - Can be entirely commented for faster execution

predictedLabels = model.predict(x_test.reshape((TEST_SET_SIZE,IMAGE_LENGTH,IMAGE_LENGTH,CHANNELS_NUMBER))) #x_test is reshaped from 1-D to a 4-D array

prediction = ''
for i in range(TEST_SET_SIZE):
 prediction=prediction+str(np.argmax(predictedLabels, axis=1)[i]) #Each element in predictedLabels will be array with 10 floats, maximum value from those must be picked with np.argmax
print("\nThe network predicted the following labels for the test set:")
print (prediction)

actual = ''
for i in range(TEST_SET_SIZE):
  actual=actual+str(y_test_original[i])
print("\nActual test set labels are:")
print(actual)