In [1]:
# Step 1: Import libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Step 2: Load and normalize MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

# Step 3: Choose hyperparameters (you can change these values)
learning_rate = 0.0005   # try 0.001 or 0.0001
batch_size = 128         # try 32, 64, 256
epochs = 8               # try 5, 10, 15

# Step 4: Build model with dropout (to reduce overfitting)
model = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(256, activation="relu"),
    layers.Dropout(0.3),                # dropout rate is a hyperparameter
    layers.Dense(128, activation="relu"),
    layers.Dense(10, activation="softmax")
])

# Step 5: Compile model with selected learning rate
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Step 6: Train model using selected batch_size and epochs
history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_split=0.1,
    verbose=1
)

# Step 7: Evaluate on test data
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print("Test accuracy:", test_acc)
print("Used hyperparameters -> lr:", learning_rate, ", batch_size:", batch_size, ", epochs:", epochs)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(**kwargs)


Epoch 1/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - accuracy: 0.7666 - loss: 0.7886 - val_accuracy: 0.9595 - val_loss: 0.1490
Epoch 2/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9423 - loss: 0.1987 - val_accuracy: 0.9722 - val_loss: 0.1026
Epoch 3/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.9581 - loss: 0.1382 - val_accuracy: 0.9745 - val_loss: 0.0839
Epoch 4/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9673 - loss: 0.1094 - val_accuracy: 0.9768 - val_loss: 0.0777
Epoch 5/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9753 - loss: 0.0874 - val_accuracy: 0.9775 - val_loss: 0.0732
Epoch 6/8
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9766 - loss: 0.0752 - val_accuracy: 0.9802 - val_loss: 0.0642
Epoch 7/8
[1m422/422[0m [32m