In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.datasets import mnist
from tensorflow.nn import softmax, relu, relu6, sigmoid, tanh
import numpy as np

# Variables

TRAIN_SET_SIZE = 60000 #DO NOT CHANGE
TEST_SET_SIZE = 10000 #DO NOT CHANGE
NORMALIZATION = 255.0 #DO NOT CHANGE
IMAGE_LENGTH = 28 #Length of MNIST image side DO NOT CHANGE

INPUT = 10 #Size of input array, MINIMUM 10
HIDDEN_NEURONS1 = 2.5*(28*28) #Neurons in hidden layer 1
HIDDEN_NEURONS2 = 2.5*(28*28)
HIDDEN_NEURONS3 = 2.5*(28*28)
HIDDEN_NEURONS4 = 2.5*(28*28)
HIDDEN_NEURONS5 = 2.5*(28*28)

HIDDEN_LAYERS = 1
EPOCHS = 1 #Number of times network will be exposed to training set
LEARNING_RATE = 0.001 #Default is 0.001
DROPOUT = 0.2

KERNEL_IN='random_uniform' #Initializer for the layer weights
BIAS_IN='ones' #Initializer for the bias
ACTIVATION1=relu #Activation function for hidden layer 1, should be non-linear for hidden layers in multilayer perceptrons
ACTIVATION2=relu
ACTIVATION3=relu
ACTIVATION4=relu
ACTIVATION5=relu

# Uncomment the desired functions, only one of a kind should be uncommented at the same time
#MODEL_OPTIMIZER=SGD(lr=LEARNING_RATE) #Technique used to upgrade network weights
#MODEL_OPTIMIZER=Adam(lr=LEARNING_RATE)
MODEL_OPTIMIZER=RMSprop(lr=LEARNING_RATE)
#LOSS='mse' #Loss function
#LOSS='binary_crossentropy'
#LOSS='categorical_crossentropy'
LOSS='sparse_categorical_crossentropy'
METRICS=['accuracy']

#MNIST database and normalization

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / NORMALIZATION, x_test / NORMALIZATION

# Layers

HiddenLayer1 = Dense(HIDDEN_NEURONS1, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=ACTIVATION1, input_dim=INPUT) #Layer will have <HIDDEN_NEURONS1> neurons and an input array with size <INPUT>
HiddenLayer2 = Dense(HIDDEN_NEURONS2, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=ACTIVATION2) #Size of input array will be number of neurons of previous layer
HiddenLayer3 = Dense(HIDDEN_NEURONS3, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=ACTIVATION3)
HiddenLayer4 = Dense(HIDDEN_NEURONS4, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=ACTIVATION4)
HiddenLayer5 = Dense(HIDDEN_NEURONS5, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=ACTIVATION5)
OutputLayer = Dense(10, kernel_initializer=KERNEL_IN, bias_initializer=BIAS_IN, activation=softmax) #Activation function should be softmax, number of neurons should be 10

# Model

model = Sequential()
model.add(Flatten()) #Flattens input matrix into 1-dimension vector
model.add(HiddenLayer1)
model.add(Dropout(DROPOUT))
if HIDDEN_LAYERS > 1:
  model.add(HiddenLayer2)
  model.add(Dropout(DROPOUT))
if HIDDEN_LAYERS > 2:
  model.add(HiddenLayer3)
  model.add(Dropout(DROPOUT))
if HIDDEN_LAYERS > 3:
  model.add(HiddenLayer4)
  model.add(Dropout(DROPOUT))
if HIDDEN_LAYERS > 4:
  model.add(HiddenLayer5)
  model.add(Dropout(DROPOUT))
model.add(OutputLayer)

# Compilation

model.compile(optimizer=MODEL_OPTIMIZER, loss=LOSS, metrics=METRICS)

# Training

model.fit(x_train, y_train, epochs=EPOCHS)

# Evaluation

#evaluationVector = model.evaluate(x_test, y_test)
evaluationVector = model.evaluate(x_train, y_train)

#print("\nError rate over test set was: ", (1-evaluationVector[1])*100, "%")
print("\nError rate over training set was: ", (1-evaluationVector[1])*100, "%")

# Label printing - Can be entirely commented for faster execution

predictedLabels = model.predict(x_train.reshape((-1,28*28)))
#predictedLabels = model.predict(x_test.reshape((-1,28*28))) #x_test is reshaped from 1-D to a 2-D array where all elements - 10000 - are arrays with 28*28 elements, that is, one MNIST digit
#Value -1 is used to ensure total size of array is kept constant - Original array has 784000 elements and one of its dimensions will have length 28*28, so other dimension must have length 10000
'''
prediction = ''
for i in range(TEST_SET_SIZE):
 prediction=prediction+str(np.argmax(predictedLabels, axis=1)[i]) #Each element in predictedLabels will be array with 10 floats, maximum value from those must be picked with np.argmax
print("\nThe network predicted the following labels for the test set:")
print (prediction)

actual = ''
for i in range(TEST_SET_SIZE):
  actual=actual+str(y_test[i])
print("\nActual test set labels are:")
print(actual)'''

Epoch 1/1

Error rate over training set was:  2.7583333333333293 %


'\nprediction = \'\'\nfor i in range(TEST_SET_SIZE):\n prediction=prediction+str(np.argmax(predictedLabels, axis=1)[i]) #Each element in predictedLabels will be array with 10 floats, maximum value from those must be picked with np.argmax\nprint("\nThe network predicted the following labels for the test set:")\nprint (prediction)\n\nactual = \'\'\nfor i in range(TEST_SET_SIZE):\n  actual=actual+str(y_test[i])\nprint("\nActual test set labels are:")\nprint(actual)'