In [None]:
# dataset: 10000 mnist (28x28 "image" with label)

# framework: tf.keras
# model: NN and CNN
    # NN layer: n relu neurons | 10 softmax neuron
    # CNN layer: n convo 3x3 + relu | 2x2 pool | flatten | 10 softmax neuron
    # both: n convo 3x3 + relu | 2x2 pool | flatten | n relu neurons | 10 softmax neuron
    # 10 epoch, sparseCategoricalCrossentropy, optimizer adam

# result: all works, keras OP. In particular:
    # 32 NN: 94.11%, 64 NN: 96.76% , 128 NN: 98.96%, 256 NN: 99.95% [2.3 mb]
    # 32 CNN: 96.57%, 64 CNN: 98.59%, 128 CNN: 99.78%, 256 CNN: 100% at epoch 9 [5 mb]
    # 32 both: 98.74%, 64 both: 100% at epoch 8 
    # 256 both: 100% at epoch 7 (confirmed on 10 test sample) [126.8 mb]

import tensorflow as tf
import numpy as np

In [None]:
load_data = np.loadtxt("mnist.csv", delimiter=",", skiprows=1)
data = load_data[:1000]

Y_train = data[:,0]
X_train = data[:,1:]
X_train = X_train / 255.0  # [0, 1]

test = load_data[1001:1013]
Ytest = data[:,0]
Xtest = data[:,1:]

In [None]:
model1 = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')])
model1.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy'])

model1.fit(X_train, Y_train, epochs=10)

In [None]:
model2 = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((28, 28, 1), input_shape=(784,)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')])
model2.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy'])

model2.fit(X_train, Y_train, epochs=10)

In [None]:
model3 = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((28, 28, 1), input_shape=(784,)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')])
model3.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy'])

model3.fit(X_train, Y_train, epochs=10)

In [None]:
model1.save("nn.keras")
model2.save("cnn.keras")
# model3.save("nncnn.keras")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

test = Xtest.reshape(-1, 784)

predictions1 = model1.predict(test)
predictions2 = model2.predict(test)
label1 = np.argmax(predictions1, axis=1)
label2 = np.argmax(predictions2, axis=1)

fig, axes = plt.subplots(2, 6, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
    ax.imshow(test[i].reshape(28, 28), cmap="gray")
    ax.set_title(f"NN: {label1[i]} | CNN: {label2[i]}")
    ax.axis("off")

plt.show()