In [1]:
from tensorflow.keras.datasets import mnist
import pandas as pd

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print("MNIST train shape:", X_train.shape)
print("MNIST test shape:", X_test.shape)

MNIST train shape: (60000, 28, 28)
MNIST test shape: (10000, 28, 28)


In [3]:

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

df_train = pd.DataFrame(X_train_flat)
df_train['label'] = y_train

df_test = pd.DataFrame(X_test_flat)
df_test['label'] = y_test

print("Train DataFrame shape:", df_train.shape)
print("Test DataFrame shape:", df_test.shape)
df_train.head()

Train DataFrame shape: (60000, 785)
Test DataFrame shape: (10000, 785)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9


In [4]:
df_train.isnull().sum()

0        0
1        0
2        0
3        0
4        0
        ..
780      0
781      0
782      0
783      0
label    0
Length: 785, dtype: int64

In [5]:
df_train.duplicated().sum()

np.int64(0)

## ReLU Hidden Layer + Softmax Output

#### Prepare Labels (One-hot)

In [6]:
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

#### Model Definition

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model_relu_softmax = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#### Compile Model

In [8]:
model_relu_softmax.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [9]:
history_relu_softmax = model_relu_softmax.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8493 - loss: 2.7305 - val_accuracy: 0.9003 - val_loss: 0.4679
Epoch 2/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9044 - loss: 0.4186 - val_accuracy: 0.9167 - val_loss: 0.3304
Epoch 3/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9261 - loss: 0.2911 - val_accuracy: 0.9488 - val_loss: 0.2220
Epoch 4/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9354 - loss: 0.2570 - val_accuracy: 0.9492 - val_loss: 0.2211
Epoch 5/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9410 - loss: 0.2377 - val_accuracy: 0.9425 - val_loss: 0.2278
Epoch 6/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9444 - loss: 0.2186 - val_accuracy: 0.9522 - val_loss: 0.2063
Epoch 7/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9481 - loss: 0.2083 - val_accuracy: 0.9507 - val_loss: 0.2002
Epoch 8/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9517 - loss: 0.1925 - val_accuracy: 0.9480 - val_loss: 0.2298
Epoch 9/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9520 - loss: 0.1952 - val_accuracy: 0.9500 - 

#### Evaluate Model

In [10]:
score = model_relu_softmax.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Softmax Test Accuracy: {score[1]:.4f}')

ReLU+Softmax Test Accuracy: 0.9445


## ReLU Hidden Layer + Softmax Output with SGD Optimizer

#### Model Definition

In [11]:
from tensorflow.keras.optimizers import SGD
model_relu_softmax_sgd = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='softmax')
])

#### Compile Model

In [12]:
model_relu_softmax_sgd.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [13]:
history_relu_softmax_sgd = model_relu_softmax_sgd.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2503 - loss: 50.6571 - val_accuracy: 0.2660 - val_loss: 1.9049
Epoch 2/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2674 - loss: 1.9272 - val_accuracy: 0.2432 - val_loss: 1.9417
Epoch 3/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2524 - loss: 1.9352 - val_accuracy: 0.2155 - val_loss: 2.0013
Epoch 4/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2491 - loss: 1.9381 - val_accuracy: 0.2452 - val_loss: 1.9176
Epoch 5/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2389 - loss: 1.9707 - val_accuracy: 0.2527 - val_loss: 1.9751
Epoch 6/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2434 - loss: 2.0249 - val_accuracy: 0.2570 - val_loss: 1.9650
Epoch 7/10
1688/1688 - 2s - 1000us/step - accuracy: 0.2494 - loss: 1.9727 - val_accuracy: 0.2187 - val_loss: 1.9869
Epoch 8/10
1688/1688 - 2s - 997us/step - accuracy: 0.2493 - loss: 1.9284 - val_accuracy: 0.2718 - val_loss: 1.8834
Epoch 9/10
1688/1688 - 2s - 1ms/step - accuracy: 0.2429 - loss: 1.9481 - val_accuracy: 0.2

#### Evaluate Model

In [14]:
score = model_relu_softmax_sgd.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Softmax (SGD) Test Accuracy: {score[1]:.4f}')

ReLU+Softmax (SGD) Test Accuracy: 0.2534


## ReLU Hidden Layer + Softmax Output with RMSprop Optimizer

#### Model Definition

In [15]:
from tensorflow.keras.optimizers import RMSprop
model_relu_softmax_rmsprop = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='softmax')
])

#### Compile Model

In [16]:
model_relu_softmax_rmsprop.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [17]:
history_relu_softmax_rmsprop = model_relu_softmax_rmsprop.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 2s - 1ms/step - accuracy: 0.8713 - loss: 2.9943 - val_accuracy: 0.9310 - val_loss: 0.5385
Epoch 2/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9246 - loss: 0.6131 - val_accuracy: 0.9368 - val_loss: 0.3976
Epoch 3/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9436 - loss: 0.4574 - val_accuracy: 0.9480 - val_loss: 0.5132
Epoch 4/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9527 - loss: 0.3817 - val_accuracy: 0.9505 - val_loss: 0.5775
Epoch 5/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9590 - loss: 0.3477 - val_accuracy: 0.9590 - val_loss: 0.4562
Epoch 6/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9621 - loss: 0.3174 - val_accuracy: 0.9602 - val_loss: 0.4339
Epoch 7/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9656 - loss: 0.2888 - val_accuracy: 0.9655 - val_loss: 0.4648
Epoch 8/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9684 - loss: 0.2682 - val_accuracy: 0.9633 - val_loss: 0.4533
Epoch 9/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9714 - loss: 0.2444 - val_accuracy: 0.9630 - 

#### Evaluate Model

In [18]:
score = model_relu_softmax_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Softmax (RMSprop) Test Accuracy: {score[1]:.4f}')

ReLU+Softmax (RMSprop) Test Accuracy: 0.9559


## ReLU Hidden Layer + Sigmoid Output

#### Model Definition

In [19]:
model_relu_sigmoid = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])

#### Compile Model

In [20]:
model_relu_sigmoid.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [21]:
history_relu_sigmoid = model_relu_sigmoid.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8865 - loss: 0.4585 - val_accuracy: 0.9338 - val_loss: 0.0750
Epoch 2/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9323 - loss: 0.0655 - val_accuracy: 0.9418 - val_loss: 0.0449
Epoch 3/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9450 - loss: 0.0426 - val_accuracy: 0.9540 - val_loss: 0.0345
Epoch 4/10
1688/1688 - 2s - 1ms/step - accuracy: 0.9511 - loss: 0.0362 - val_accuracy: 0.9545 - val_loss: 0.0331
Epoch 5/10
1688/1688 - 3s - 2ms/step - accuracy: 0.9562 - loss: 0.0335 - val_accuracy: 0.9565 - val_loss: 0.0327
Epoch 6/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9601 - loss: 0.0315 - val_accuracy: 0.9622 - val_loss: 0.0315
Epoch 7/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9633 - loss: 0.0282 - val_accuracy: 0.9570 - val_loss: 0.0339
Epoch 8/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9649 - loss: 0.0271 - val_accuracy: 0.9630 - val_loss: 0.0316
Epoch 9/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9686 - loss: 0.0249 - val_accuracy: 0.9660 - 

#### Evaluate Model

In [22]:
score = model_relu_sigmoid.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Sigmoid Test Accuracy: {score[1]:.4f}')

ReLU+Sigmoid Test Accuracy: 0.9567


## ReLU Hidden Layer + Sigmoid Output with SGD Optimizer

#### Model Definition

In [23]:
model_relu_sigmoid_sgd = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])

#### Compile Model

In [24]:
model_relu_sigmoid_sgd.compile(optimizer=SGD(), loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [25]:
history_relu_sigmoid_sgd = model_relu_sigmoid_sgd.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 5s - 3ms/step - accuracy: 0.7667 - loss: 0.3815 - val_accuracy: 0.7867 - val_loss: 0.1009
Epoch 2/10
1688/1688 - 4s - 2ms/step - accuracy: 0.8650 - loss: 0.0902 - val_accuracy: 0.9048 - val_loss: 0.0725
Epoch 3/10
1688/1688 - 4s - 2ms/step - accuracy: 0.8970 - loss: 0.0731 - val_accuracy: 0.9243 - val_loss: 0.0589
Epoch 4/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9127 - loss: 0.0615 - val_accuracy: 0.9250 - val_loss: 0.0542
Epoch 5/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9236 - loss: 0.0533 - val_accuracy: 0.9232 - val_loss: 0.0615
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9293 - loss: 0.0494 - val_accuracy: 0.9413 - val_loss: 0.0429
Epoch 7/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9328 - loss: 0.0469 - val_accuracy: 0.9438 - val_loss: 0.0406
Epoch 8/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9358 - loss: 0.0452 - val_accuracy: 0.9452 - val_loss: 0.0403
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9380 - loss: 0.0433 - val_accuracy: 0.9420 - 

#### Evaluate Model

In [26]:
score = model_relu_sigmoid_sgd.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Sigmoid (SGD) Test Accuracy: {score[1]:.4f}')

ReLU+Sigmoid (SGD) Test Accuracy: 0.9369


## ReLU Hidden Layer + Sigmoid Output with RMSprop Optimizer

#### Model Definition

In [27]:
model_relu_sigmoid_rmsprop = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])
model_relu_sigmoid_rmsprop.summary()

#### Compile Model

In [28]:
model_relu_sigmoid_rmsprop.compile(optimizer=RMSprop(), loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [29]:
history_relu_sigmoid_rmsprop = model_relu_sigmoid_rmsprop.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 5s - 3ms/step - accuracy: 0.8881 - loss: 0.4251 - val_accuracy: 0.9418 - val_loss: 0.0748
Epoch 2/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9329 - loss: 0.0750 - val_accuracy: 0.9485 - val_loss: 0.0535
Epoch 3/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9434 - loss: 0.0608 - val_accuracy: 0.9472 - val_loss: 0.0608
Epoch 4/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9496 - loss: 0.0563 - val_accuracy: 0.9560 - val_loss: 0.0487
Epoch 5/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9504 - loss: 0.0544 - val_accuracy: 0.9558 - val_loss: 0.0535
Epoch 6/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9552 - loss: 0.0523 - val_accuracy: 0.9537 - val_loss: 0.0607
Epoch 7/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9556 - loss: 0.0512 - val_accuracy: 0.9532 - val_loss: 0.0578
Epoch 8/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9562 - loss: 0.0487 - val_accuracy: 0.9508 - val_loss: 0.0542
Epoch 9/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9566 - loss: 0.0466 - val_accuracy: 0.9578 - 

#### Evaluate Model

In [30]:
score = model_relu_sigmoid_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'ReLU+Sigmoid (RMSprop) Test Accuracy: {score[1]:.4f}')

ReLU+Sigmoid (RMSprop) Test Accuracy: 0.9500


## Sigmoid Hidden Layer + Softmax Output

#### Model Definition

In [31]:
model_sigmoid_softmax = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='softmax')
])

#### Compile Model

In [32]:
model_sigmoid_softmax.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [33]:
history_sigmoid_softmax = model_sigmoid_softmax.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 6s - 4ms/step - accuracy: 0.8502 - loss: 0.5548 - val_accuracy: 0.9147 - val_loss: 0.3226
Epoch 2/10
1688/1688 - 5s - 3ms/step - accuracy: 0.8969 - loss: 0.3553 - val_accuracy: 0.9123 - val_loss: 0.2979
Epoch 3/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9016 - loss: 0.3361 - val_accuracy: 0.9292 - val_loss: 0.2560
Epoch 4/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9057 - loss: 0.3117 - val_accuracy: 0.9295 - val_loss: 0.2465
Epoch 5/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9113 - loss: 0.2969 - val_accuracy: 0.9293 - val_loss: 0.2344
Epoch 6/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9123 - loss: 0.2927 - val_accuracy: 0.9327 - val_loss: 0.2251
Epoch 7/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9166 - loss: 0.2786 - val_accuracy: 0.9332 - val_loss: 0.2300
Epoch 8/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9191 - loss: 0.2687 - val_accuracy: 0.9332 - val_loss: 0.2170
Epoch 9/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9217 - loss: 0.2573 - val_accuracy: 0.9355 - 

#### Evaluate Model

In [34]:
score = model_sigmoid_softmax.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Softmax Test Accuracy: {score[1]:.4f}')

Sigmoid+Softmax Test Accuracy: 0.9238


## Sigmoid Hidden Layer + Softmax Output with SGD Optimizer

#### Model Definition

In [35]:
model_sigmoid_softmax_sgd = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='softmax')
])

#### Compile Model

In [36]:
model_sigmoid_softmax_sgd.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [37]:
history_sigmoid_softmax_sgd = model_sigmoid_softmax_sgd.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 5s - 3ms/step - accuracy: 0.8271 - loss: 0.6741 - val_accuracy: 0.9113 - val_loss: 0.3777
Epoch 2/10
1688/1688 - 4s - 2ms/step - accuracy: 0.8964 - loss: 0.3856 - val_accuracy: 0.9248 - val_loss: 0.2940
Epoch 3/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9091 - loss: 0.3295 - val_accuracy: 0.9297 - val_loss: 0.2575
Epoch 4/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9162 - loss: 0.2986 - val_accuracy: 0.9357 - val_loss: 0.2423
Epoch 5/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9221 - loss: 0.2796 - val_accuracy: 0.9368 - val_loss: 0.2342
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9256 - loss: 0.2591 - val_accuracy: 0.9415 - val_loss: 0.2081
Epoch 7/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9286 - loss: 0.2515 - val_accuracy: 0.9452 - val_loss: 0.2089
Epoch 8/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9317 - loss: 0.2395 - val_accuracy: 0.9475 - val_loss: 0.1979
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9327 - loss: 0.2345 - val_accuracy: 0.9455 - 

#### Evaluate Model

In [38]:
score = model_sigmoid_softmax_sgd.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Softmax (SGD) Test Accuracy: {score[1]:.4f}')

Sigmoid+Softmax (SGD) Test Accuracy: 0.9386


## Sigmoid Hidden Layer + Softmax Output with RMSprop Optimizer

#### Model Definition

In [39]:
model_sigmoid_softmax_rmsprop = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='softmax')
])
model_sigmoid_softmax_rmsprop.summary()

#### Compile Model

In [40]:
model_sigmoid_softmax_rmsprop.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Model

In [41]:
history_sigmoid_softmax_rmsprop = model_sigmoid_softmax_rmsprop.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 6s - 3ms/step - accuracy: 0.8652 - loss: 0.4658 - val_accuracy: 0.9243 - val_loss: 0.2461
Epoch 2/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9154 - loss: 0.2771 - val_accuracy: 0.9397 - val_loss: 0.2003
Epoch 3/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9256 - loss: 0.2455 - val_accuracy: 0.9403 - val_loss: 0.1865
Epoch 4/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9326 - loss: 0.2249 - val_accuracy: 0.9413 - val_loss: 0.1851
Epoch 5/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9346 - loss: 0.2160 - val_accuracy: 0.9500 - val_loss: 0.1680
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9386 - loss: 0.2065 - val_accuracy: 0.9540 - val_loss: 0.1629
Epoch 7/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9394 - loss: 0.1983 - val_accuracy: 0.9485 - val_loss: 0.1643
Epoch 8/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9423 - loss: 0.1918 - val_accuracy: 0.9523 - val_loss: 0.1576
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9428 - loss: 0.1910 - val_accuracy: 0.9537 - 

#### Evaluate Model

In [42]:
score = model_sigmoid_softmax_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Softmax (RMSprop) Test Accuracy: {score[1]:.4f}')

Sigmoid+Softmax (RMSprop) Test Accuracy: 0.9460


## Sigmoid Hidden Layer + Sigmoid Output

#### Model Definition

In [43]:
model_sigmoid_sigmoid = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])

#### Compile Model

In [44]:
model_sigmoid_sigmoid.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [45]:
history_sigmoid_sigmoid = model_sigmoid_sigmoid.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 5s - 3ms/step - accuracy: 0.8305 - loss: 0.1188 - val_accuracy: 0.9085 - val_loss: 0.0688
Epoch 2/10
1688/1688 - 4s - 2ms/step - accuracy: 0.8951 - loss: 0.0724 - val_accuracy: 0.9202 - val_loss: 0.0581
Epoch 3/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9070 - loss: 0.0637 - val_accuracy: 0.9257 - val_loss: 0.0542
Epoch 4/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9124 - loss: 0.0584 - val_accuracy: 0.9257 - val_loss: 0.0514
Epoch 5/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9194 - loss: 0.0547 - val_accuracy: 0.9362 - val_loss: 0.0468
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9190 - loss: 0.0544 - val_accuracy: 0.9243 - val_loss: 0.0509
Epoch 7/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9223 - loss: 0.0518 - val_accuracy: 0.9383 - val_loss: 0.0431
Epoch 8/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9256 - loss: 0.0488 - val_accuracy: 0.9385 - val_loss: 0.0417
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9280 - loss: 0.0481 - val_accuracy: 0.9458 - 

#### Evaluate Model

In [46]:
score = model_sigmoid_sigmoid.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Sigmoid Test Accuracy: {score[1]:.4f}')

Sigmoid+Sigmoid Test Accuracy: 0.9331


## Sigmoid Hidden Layer + Sigmoid Output with SGD Optimizer

#### Model Definition

In [47]:
model_sigmoid_sigmoid_sgd = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])

#### Compile Model

In [48]:
model_sigmoid_sigmoid_sgd.compile(optimizer=SGD(), loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [49]:
history_sigmoid_sigmoid_sgd = model_sigmoid_sigmoid_sgd.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 4s - 2ms/step - accuracy: 0.6060 - loss: 0.2424 - val_accuracy: 0.8175 - val_loss: 0.1738
Epoch 2/10
1688/1688 - 4s - 2ms/step - accuracy: 0.8154 - loss: 0.1590 - val_accuracy: 0.8745 - val_loss: 0.1329
Epoch 3/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8531 - loss: 0.1302 - val_accuracy: 0.8933 - val_loss: 0.1115
Epoch 4/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8714 - loss: 0.1130 - val_accuracy: 0.9023 - val_loss: 0.0982
Epoch 5/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8824 - loss: 0.1018 - val_accuracy: 0.9080 - val_loss: 0.0890
Epoch 6/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8903 - loss: 0.0933 - val_accuracy: 0.9127 - val_loss: 0.0817
Epoch 7/10
1688/1688 - 3s - 2ms/step - accuracy: 0.8945 - loss: 0.0869 - val_accuracy: 0.9168 - val_loss: 0.0761
Epoch 8/10
1688/1688 - 3s - 2ms/step - accuracy: 0.9003 - loss: 0.0817 - val_accuracy: 0.9185 - val_loss: 0.0719
Epoch 9/10
1688/1688 - 3s - 2ms/step - accuracy: 0.9044 - loss: 0.0775 - val_accuracy: 0.9188 - 

#### Evaluate Model

In [50]:
score = model_sigmoid_sigmoid_sgd.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Sigmoid (SGD) Test Accuracy: {score[1]:.4f}')

Sigmoid+Sigmoid (SGD) Test Accuracy: 0.9115


## Sigmoid Hidden Layer + Sigmoid Output with RMSprop Optimizer

#### Model Definition

In [51]:
model_sigmoid_sigmoid_rmsprop = Sequential([
    Dense(128, activation='sigmoid', input_shape=(784,)),
    Dense(10, activation='sigmoid')
])

#### Compile Model

In [52]:
model_sigmoid_sigmoid_rmsprop.compile(optimizer=RMSprop(), loss='binary_crossentropy', metrics=['accuracy'])

#### Train Model

In [53]:
history_sigmoid_sigmoid_rmsprop = model_sigmoid_sigmoid_rmsprop.fit(
    X_train_flat, y_train_cat,
    epochs=10, batch_size=32, validation_split=0.1, verbose=2
)

Epoch 1/10
1688/1688 - 5s - 3ms/step - accuracy: 0.8476 - loss: 0.1005 - val_accuracy: 0.9247 - val_loss: 0.0530
Epoch 2/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9070 - loss: 0.0594 - val_accuracy: 0.9310 - val_loss: 0.0464
Epoch 3/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9186 - loss: 0.0519 - val_accuracy: 0.9362 - val_loss: 0.0409
Epoch 4/10
1688/1688 - 3s - 2ms/step - accuracy: 0.9234 - loss: 0.0486 - val_accuracy: 0.9445 - val_loss: 0.0385
Epoch 5/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9276 - loss: 0.0458 - val_accuracy: 0.9425 - val_loss: 0.0370
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9328 - loss: 0.0435 - val_accuracy: 0.9483 - val_loss: 0.0343
Epoch 7/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9352 - loss: 0.0427 - val_accuracy: 0.9512 - val_loss: 0.0344
Epoch 8/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9358 - loss: 0.0418 - val_accuracy: 0.9512 - val_loss: 0.0330
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9379 - loss: 0.0404 - val_accuracy: 0.9520 - 

#### Evaluate Model

In [54]:
score = model_sigmoid_sigmoid_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0)
print(f'Sigmoid+Sigmoid (RMSprop) Test Accuracy: {score[1]:.4f}')

Sigmoid+Sigmoid (RMSprop) Test Accuracy: 0.9393


## Accuracy Comparison

In [55]:
# Collect test accuracies from all model evaluations

def get_last_accuracy(cell):
    # Helper to extract accuracy from Keras evaluate output
    if isinstance(cell, tuple) or isinstance(cell, list):
        return cell[1]
    return cell

# Evaluate Adam models
test_acc_relu_softmax_adam = get_last_accuracy(model_relu_softmax.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_relu_sigmoid_adam = get_last_accuracy(model_relu_sigmoid.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_softmax_adam = get_last_accuracy(model_sigmoid_softmax.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_sigmoid_adam = get_last_accuracy(model_sigmoid_sigmoid.evaluate(X_test_flat, y_test_cat, verbose=0))

# Evaluate SGD models
test_acc_relu_softmax_sgd = get_last_accuracy(model_relu_softmax_sgd.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_relu_sigmoid_sgd = get_last_accuracy(model_relu_sigmoid_sgd.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_softmax_sgd = get_last_accuracy(model_sigmoid_softmax_sgd.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_sigmoid_sgd = get_last_accuracy(model_sigmoid_sigmoid_sgd.evaluate(X_test_flat, y_test_cat, verbose=0))

# Evaluate RMSprop models
test_acc_relu_softmax_rmsprop = get_last_accuracy(model_relu_softmax_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_relu_sigmoid_rmsprop = get_last_accuracy(model_relu_sigmoid_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_softmax_rmsprop = get_last_accuracy(model_sigmoid_softmax_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0))
test_acc_sigmoid_sigmoid_rmsprop = get_last_accuracy(model_sigmoid_sigmoid_rmsprop.evaluate(X_test_flat, y_test_cat, verbose=0))

print('Adam Models:')
print('ReLU+Softmax Test Accuracy:', test_acc_relu_softmax_adam)
print('ReLU+Sigmoid Test Accuracy:', test_acc_relu_sigmoid_adam)
print('Sigmoid+Softmax Test Accuracy:', test_acc_sigmoid_softmax_adam)
print('Sigmoid+Sigmoid Test Accuracy:', test_acc_sigmoid_sigmoid_adam)

print('\nSGD Models:')
print('ReLU+Softmax Test Accuracy:', test_acc_relu_softmax_sgd)
print('ReLU+Sigmoid Test Accuracy:', test_acc_relu_sigmoid_sgd)
print('Sigmoid+Softmax Test Accuracy:', test_acc_sigmoid_softmax_sgd)
print('Sigmoid+Sigmoid Test Accuracy:', test_acc_sigmoid_sigmoid_sgd)

print('\nRMSprop Models:')
print('ReLU+Softmax Test Accuracy:', test_acc_relu_softmax_rmsprop)
print('ReLU+Sigmoid Test Accuracy:', test_acc_relu_sigmoid_rmsprop)
print('Sigmoid+Softmax Test Accuracy:', test_acc_sigmoid_softmax_rmsprop)
print('Sigmoid+Sigmoid Test Accuracy:', test_acc_sigmoid_sigmoid_rmsprop)

Adam Models:
ReLU+Softmax Test Accuracy: 0.9445000290870667
ReLU+Sigmoid Test Accuracy: 0.9567000269889832
Sigmoid+Softmax Test Accuracy: 0.923799991607666
Sigmoid+Sigmoid Test Accuracy: 0.9330999851226807

SGD Models:
ReLU+Softmax Test Accuracy: 0.2533999979496002
ReLU+Sigmoid Test Accuracy: 0.9369000196456909
Sigmoid+Softmax Test Accuracy: 0.9386000037193298
Sigmoid+Sigmoid Test Accuracy: 0.9114999771118164

RMSprop Models:
ReLU+Softmax Test Accuracy: 0.9559000134468079
ReLU+Sigmoid Test Accuracy: 0.949999988079071
Sigmoid+Softmax Test Accuracy: 0.9459999799728394
Sigmoid+Sigmoid Test Accuracy: 0.939300000667572
