In [26]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [27]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# PREPROCESSING

In [28]:
# 1. Flatten the 28x28 images to 1D arrays (784 features)
x_train_flat = x_train.reshape(x_train.shape[0], -1)  # (60000, 784)
x_test_flat = x_test.reshape(x_test.shape[0], -1)     # (10000, 784)

# For CNN: Keep 2D shape and add channel dimension, then normalize
x_train_cnn = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32') / 255.0
x_test_cnn = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32') / 255.0

# 2. Normalize pixel values from [0, 255] to [0, 1]
x_train_normalized = x_train_flat.astype('float32') / 255.0
x_test_normalized = x_test_flat.astype('float32') / 255.0

# 3. One-hot encode labels (optional, but recommended for categorical crossentropy)
y_train_onehot = to_categorical(y_train, 10)
y_test_onehot = to_categorical(y_test, 10)

# Train dense-only NN

In [29]:
dense_model = Sequential([
    # Input layer - explicitly define input shape
    Dense(128, activation='relu', input_shape=(784,)),

    # Hidden layers
    Dense(64, activation='relu'),
    Dropout(0.2),

    Dense(32, activation='relu'),
    Dropout(0.2),

    Dense(10, activation='softmax')
])

In [30]:
dense_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',  # Use with one-hot encoded labels
    metrics=['accuracy']
)

In [31]:
print("\nTraining model...")
history = dense_model.fit(
    x_train_normalized, y_train_onehot,
    batch_size=128,
    epochs=10,
    validation_split=0.1,  # Use 10% of training data for validation
    verbose=1
)


Training model...
Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.6933 - loss: 0.9521 - val_accuracy: 0.9582 - val_loss: 0.1456
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9375 - loss: 0.2214 - val_accuracy: 0.9672 - val_loss: 0.1091
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9554 - loss: 0.1597 - val_accuracy: 0.9710 - val_loss: 0.0972
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9648 - loss: 0.1244 - val_accuracy: 0.9765 - val_loss: 0.0807
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9730 - loss: 0.0983 - val_accuracy: 0.9753 - val_loss: 0.0867
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9774 - loss: 0.0830 - val_accuracy: 0.9763 - val_loss: 0.0843
Epoch 7/

In [22]:
print("\nEvaluating on test set...")
test_loss, test_accuracy = dense_model.evaluate(x_test_normalized, y_test_onehot, verbose=0)
print(f"Test accuracy: {test_accuracy:.4f}")


Evaluating on test set...
Test accuracy: 0.9781


# Train CNN

In [23]:
cnn_model = Sequential([
    Conv2D(
        filters=32,           # Number of feature maps
        kernel_size=(3, 3),   # 3x3 filter
        activation='relu',
        input_shape=(28, 28, 1)
    ),

    # Max Pooling: Reduce spatial dimensions
    MaxPooling2D(
        pool_size=(2, 2)      # 2x2 pooling
    ),

    # Flatten: Convert 2D feature maps to 1D for dense layer
    Flatten(),

    # Dense Layer: Classification
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(10, activation='softmax')
], name="Simple_CNN")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
cnn_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [25]:
cnn_history = cnn_model.fit(
    x_train_cnn, y_train_onehot,
    batch_size=128,
    epochs=5,  # Same number of epochs
    validation_split=0.1,
    verbose=1
)


Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 67ms/step - accuracy: 0.8342 - loss: 0.5756 - val_accuracy: 0.9737 - val_loss: 0.0945
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 68ms/step - accuracy: 0.9659 - loss: 0.1201 - val_accuracy: 0.9840 - val_loss: 0.0604
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 71ms/step - accuracy: 0.9778 - loss: 0.0756 - val_accuracy: 0.9845 - val_loss: 0.0582
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 61ms/step - accuracy: 0.9829 - loss: 0.0577 - val_accuracy: 0.9848 - val_loss: 0.0513
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 58ms/step - accuracy: 0.9852 - loss: 0.0484 - val_accuracy: 0.9883 - val_loss: 0.0444


In [32]:
cnn_loss, cnn_accuracy = cnn_model.evaluate(x_test_cnn, y_test_onehot, verbose=0)
print(f"Simple CNN Test Accuracy: {cnn_accuracy:.4f} ({cnn_accuracy*100:.2f}%)")

Simple CNN Test Accuracy: 0.9848 (98.48%)
