In [None]:
import tensorflow as tf
tf.__version__

In [None]:
mnist_data = tf.keras.datasets.mnist

# Divide Test and Train Data
(x_train, y_train), (x_text, y_test) = mnist_data.load_data()

# Normalization and cast to double
x_train = x_train / 255.
x_text  = x_text / 255.

In [None]:
model_dense = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = x_train.shape[1:]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10),
])

model_double_cnn = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(28, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10),
])

model_single_cnn = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10),
])

In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
model_dense.compile(optimizer='adam', loss=loss, metrics=['accuracy',])
model_double_cnn.compile(optimizer='adam', loss=loss, metrics=['accuracy',])
model_single_cnn.compile(optimizer='adam', loss=loss, metrics=['accuracy',])

In [None]:
model_dense.fit(x_train, y_train, epochs=5)

In [None]:
model_double_cnn.fit(x_train, y_train, epochs=5)

In [None]:
model_single_cnn.fit(x_train, y_train, epochs=5)

In [None]:
print("Dense Model Accuracy : ", model_dense.evaluate(x_text, y_test, verbose=0)[1])
print("Double CNN Model Accuracy : ", model_double_cnn.evaluate(x_text, y_test, verbose=0)[1])
print("Single CNN Model Accuracy : ", model_single_cnn.evaluate(x_text, y_test, verbose=0)[1])

As you can see, with fewer neurons, the CNN could predict the outputs more accurately than the Dense model. Additionally, the Double CNN did not perform significantly better than the Single CNN, despite its greater complexity.