# **Neural Network Classification with MNIST (Digits)**

## **Importing Libraries**

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical

## **Loading The Dataset**

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

## **Normalize Dataset**

In [3]:
# Normalize images (0–255 → 0–1)
x_train, x_test = x_train / 255.0, x_test / 255.0

## **Encoding Labels**

In [4]:
# One-hot encode labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

## **Build a Base Model**

In [5]:
def build_model(hidden_activation="relu", output_activation="softmax", optimizer="adam"):
    model = Sequential([
        Flatten(input_shape=(28, 28)),          # flatten 28x28 to 784
        Dense(128, activation=hidden_activation),
        Dense(10, activation=output_activation) # 10 classes
    ])

    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",        # works well for classification
        metrics=["accuracy"]
    )
    return model

## **Run Experiments**

### **1. Hidden activation: relu vs sigmoid**

In [6]:
model_relu = build_model(hidden_activation="relu")
history_relu = model_relu.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1, verbose=0)

model_sigmoid = build_model(hidden_activation="sigmoid")
history_sigmoid = model_sigmoid.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1, verbose=0)

  super().__init__(**kwargs)


### **2. Output activation: softmax vs something else**

In [7]:
model_sigmoid_out = build_model(output_activation="sigmoid")
model_sigmoid_out.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1)

Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 17ms/step - accuracy: 0.8942 - loss: 0.3834 - val_accuracy: 0.9520 - val_loss: 0.1788
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.9477 - loss: 0.1826 - val_accuracy: 0.9640 - val_loss: 0.1288
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - accuracy: 0.9616 - loss: 0.1333 - val_accuracy: 0.9692 - val_loss: 0.1068
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.9701 - loss: 0.1027 - val_accuracy: 0.9720 - val_loss: 0.0944
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.9758 - loss: 0.0840 - val_accuracy: 0.9728 - val_loss: 0.0934


<keras.src.callbacks.history.History at 0x170d1df47d0>

### **3. Double hidden layers**

In [8]:
def build_deeper_model(hidden_activation="relu", optimizer="adam"):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(256, activation=hidden_activation),
        Dense(128, activation=hidden_activation),
        Dense(10, activation="softmax")
    ])
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    return model

model_deep = build_deeper_model()
history_deep = model_deep.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1, verbose=0)

### **4. Compare optimizers**

In [9]:
optimizers = ["adam", "sgd", "rmsprop"]
results = {}

for opt in optimizers:
    model = build_model(optimizer=opt)
    history = model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1, verbose=0)
    results[opt] = history.history["val_accuracy"]