In [78]:
import tensorflow as tf
from tensorflow import keras
import datetime
from tensorflow.keras import callbacks, layers


In [79]:
(x_train, y_train),(x_test, y_test) = keras.datasets.mnist.load_data() 

In [80]:
#normalize and flattening
x_train = x_train.astype("float32")/255.0
x_test = x_test.astype("float32")/255.0
x_train = x_train.reshape(-1,28 * 28)
x_test = x_test.reshape(-1,28 *28)

In [81]:
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

In [82]:
#log directory for tensorboard
log_dir = "logs/mlp_mnist/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [83]:
early_stopping_cb = callbacks.EarlyStopping(patience=5, restore_best_weights=True)
checkpoint_cb = callbacks.ModelCheckpoint("best_mlp_mnist.h5", save_best_only=True)
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir)

In [84]:
model = keras.models.Sequential([
    layers.Input(shape= (28*28,)),
    layers.Dense(300, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(200, activation = "relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(100, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(10, activation="softmax")
])

In [85]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics = ["accuracy"])

In [86]:
history = model.fit(x_train, y_train, epochs=50, batch_size=64,
                    validation_data=(x_val, y_val),
                    callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb])

Epoch 1/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7998 - loss: 0.6593



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.8794 - loss: 0.3970 - val_accuracy: 0.9609 - val_loss: 0.1311
Epoch 2/50
[1m778/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9372 - loss: 0.2116



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9402 - loss: 0.2007 - val_accuracy: 0.9676 - val_loss: 0.1078
Epoch 3/50
[1m780/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9508 - loss: 0.1633



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9519 - loss: 0.1595 - val_accuracy: 0.9704 - val_loss: 0.0936
Epoch 4/50
[1m778/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9541 - loss: 0.1467



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9565 - loss: 0.1408 - val_accuracy: 0.9743 - val_loss: 0.0841
Epoch 5/50
[1m781/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9628 - loss: 0.1217



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9632 - loss: 0.1203 - val_accuracy: 0.9765 - val_loss: 0.0773
Epoch 6/50
[1m778/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9665 - loss: 0.1075



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9658 - loss: 0.1103 - val_accuracy: 0.9778 - val_loss: 0.0749
Epoch 7/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9688 - loss: 0.0991 - val_accuracy: 0.9779 - val_loss: 0.0773
Epoch 8/50
[1m776/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9714 - loss: 0.0906



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9718 - loss: 0.0904 - val_accuracy: 0.9772 - val_loss: 0.0747
Epoch 9/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9728 - loss: 0.0865 - val_accuracy: 0.9773 - val_loss: 0.0788
Epoch 10/50
[1m775/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.9757 - loss: 0.0779



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9751 - loss: 0.0800 - val_accuracy: 0.9796 - val_loss: 0.0684
Epoch 11/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9772 - loss: 0.0721



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9765 - loss: 0.0757 - val_accuracy: 0.9811 - val_loss: 0.0659
Epoch 12/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9789 - loss: 0.0677 - val_accuracy: 0.9824 - val_loss: 0.0665
Epoch 13/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9793 - loss: 0.0671 - val_accuracy: 0.9818 - val_loss: 0.0665
Epoch 14/50
[1m777/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9816 - loss: 0.0591



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9808 - loss: 0.0606 - val_accuracy: 0.9827 - val_loss: 0.0638
Epoch 15/50
[1m779/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9786 - loss: 0.0619



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.9792 - loss: 0.0637 - val_accuracy: 0.9839 - val_loss: 0.0614
Epoch 16/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.9818 - loss: 0.0585 - val_accuracy: 0.9811 - val_loss: 0.0669
Epoch 17/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9823 - loss: 0.0542 - val_accuracy: 0.9809 - val_loss: 0.0688
Epoch 18/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.9824 - loss: 0.0546 - val_accuracy: 0.9828 - val_loss: 0.0687
Epoch 19/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9838 - loss: 0.0501 - val_accuracy: 0.9827 - val_loss: 0.0660
Epoch 20/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9839 - loss: 0.0505 - val_accuracy: 0.9835 - val_loss: 0.0622


In [87]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Accuracy: {:.2f}%".format(test_acc * 100))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9813 - loss: 0.0614
Test Accuracy: 98.13%
