In [25]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
import time

In [13]:
# load MNIST dataset
(x_train,y_train) , (x_test ,y_test) = mnist.load_data()
# normalize pixel values to [0,1]
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
# one-hot encode labels 
y_train = to_categorical(y_train,10)
y_test = to_categorical(y_test,10)

In [14]:
model = Sequential ([
    Flatten(input_shape = (28,28)), # 784 Flatten Images
    Dense(128, activation = "relu"), # 128*784 +128 Hidden layer 1 
    Dense(64, activation = "relu"), # 64*128+64
    Dense(10, activation = "softmax") #10*64+10
])

In [15]:
model.summary()

In [17]:
model.compile(optimizer = "adam" , 
              loss  = "categorical_crossentropy" , 
              metrics = ["accuracy"]
             )

In [59]:
start_time = time.time()
model.fit(x_train , y_train, epochs = 5, batch_size = 32, validation_split = 0.1)
test_acc_time = time.time() - start_time

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.9888 - loss: 0.0347 - val_accuracy: 0.9760 - val_loss: 0.0933
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9892 - loss: 0.0313 - val_accuracy: 0.9737 - val_loss: 0.1022
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9918 - loss: 0.0253 - val_accuracy: 0.9778 - val_loss: 0.0918
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9928 - loss: 0.0221 - val_accuracy: 0.9780 - val_loss: 0.1027
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9934 - loss: 0.0192 - val_accuracy: 0.9792 - val_loss: 0.1017


In [60]:
test_loss, test_acc = model.evaluate (x_test , y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9767 - loss: 0.0924



# EXPERIMENT 2: Sigmoid vs ReLU in Hidden Layers


In [21]:
# Model with Sigmoid
print("\n--- Testing Sigmoid Hidden Layers ---")
model_sigmoid = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="sigmoid"),  # Changed from relu
    Dense(64, activation="sigmoid"),   # Changed from relu
    Dense(10, activation="softmax")
])


--- Testing Sigmoid Hidden Layers ---


In [22]:
model_sigmoid.summary()

In [23]:
model_sigmoid.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [26]:
start_time = time.time()
model_sigmoid.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)
sigmoid_time = time.time() - start_time

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 10ms/step - accuracy: 0.8759 - loss: 0.4898 - val_accuracy: 0.9485 - val_loss: 0.1862
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 9ms/step - accuracy: 0.9447 - loss: 0.1901 - val_accuracy: 0.9627 - val_loss: 0.1336
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9615 - loss: 0.1315 - val_accuracy: 0.9725 - val_loss: 0.1001
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 9ms/step - accuracy: 0.9714 - loss: 0.0977 - val_accuracy: 0.9747 - val_loss: 0.0864
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9779 - loss: 0.0745 - val_accuracy: 0.9782 - val_loss: 0.0782


In [27]:
test_loss, sigmoid_acc = model_sigmoid.evaluate(x_test, y_test, verbose=0)
print(f"Sigmoid - Test Accuracy: {sigmoid_acc:.4f}, Time: {sigmoid_time:.1f}s")

Sigmoid - Test Accuracy: 0.9721, Time: 88.8s


# EXPERIMENT 3: Different Output Activations

In [28]:
# Model with Sigmoid Output
print("\n--- Testing Sigmoid Output ---")
model_sigmoid_out = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(10, activation="sigmoid")  # Changed from softmax
])


--- Testing Sigmoid Output ---


In [29]:
model_sigmoid_out.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [30]:
model_sigmoid_out.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.9255 - loss: 0.2561 - val_accuracy: 0.9685 - val_loss: 0.1104
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9674 - loss: 0.1064 - val_accuracy: 0.9715 - val_loss: 0.0982
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9775 - loss: 0.0729 - val_accuracy: 0.9707 - val_loss: 0.0968
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9824 - loss: 0.0558 - val_accuracy: 0.9712 - val_loss: 0.0955
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9857 - loss: 0.0440 - val_accuracy: 0.9748 - val_loss: 0.0931


<keras.src.callbacks.history.History at 0x1ef54d07890>

In [31]:
test_loss, sigmoid_out_acc = model_sigmoid_out.evaluate(x_test, y_test, verbose=0)

In [32]:
print(f"Sigmoid Output - Test Accuracy: {sigmoid_out_acc:.4f}")

Sigmoid Output - Test Accuracy: 0.9745


In [33]:
# Model with ReLU Output (for comparison)
print("\n--- Testing ReLU Output ---")
model_relu_out = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(10, activation="relu")  # Changed from softmax
])


--- Testing ReLU Output ---


In [34]:
model_relu_out.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [35]:
model_relu_out.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.1184 - loss: nan - val_accuracy: 0.0978 - val_loss: nan
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.0988 - loss: nan - val_accuracy: 0.0978 - val_loss: nan
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.0988 - loss: nan - val_accuracy: 0.0978 - val_loss: nan
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.0988 - loss: nan - val_accuracy: 0.0978 - val_loss: nan
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.0988 - loss: nan - val_accuracy: 0.0978 - val_loss: nan


<keras.src.callbacks.history.History at 0x1ef544f0e90>

In [36]:
test_loss, relu_out_acc = model_relu_out.evaluate(x_test, y_test, verbose=0)

In [37]:
print(f"ReLU Output - Test Accuracy: {relu_out_acc:.4f}")

ReLU Output - Test Accuracy: 0.0980


# EXPERIMENT 4: Double the Hidden Layers

In [38]:
# Model with 4 hidden layers (double your original 2)
print("\n--- Testing 4 Hidden Layers ---")
model_deep = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(64, activation="relu"),  # Added 2 more layers
    Dense(10, activation="softmax")
])


--- Testing 4 Hidden Layers ---


In [39]:
model_deep.summary()

In [40]:
model_deep.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [41]:
start_time = time.time()
model_deep.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)
deep_time = time.time() - start_time

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 7ms/step - accuracy: 0.9176 - loss: 0.2690 - val_accuracy: 0.9662 - val_loss: 0.1040
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9656 - loss: 0.1125 - val_accuracy: 0.9735 - val_loss: 0.0884
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9753 - loss: 0.0804 - val_accuracy: 0.9720 - val_loss: 0.0911
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9800 - loss: 0.0647 - val_accuracy: 0.9740 - val_loss: 0.0825
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9836 - loss: 0.0522 - val_accuracy: 0.9758 - val_loss: 0.0772


In [42]:
test_loss, deep_acc = model_deep.evaluate(x_test, y_test, verbose=0)

In [43]:
print(f"Deep Model - Test Accuracy: {deep_acc:.4f}, Time: {deep_time:.1f}s")

Deep Model - Test Accuracy: 0.9751, Time: 73.2s


In [44]:
# EXPERIMENT 5: Different Optimizers

In [45]:
# SGD Optimizer
print("\n--- Testing SGD Optimizer ---")
model_sgd = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(10, activation="softmax")
])


--- Testing SGD Optimizer ---


In [47]:
model_sgd.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

In [48]:
start_time = time.time()
model_sgd.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)
sgd_time = time.time() - start_time

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8214 - loss: 0.6864 - val_accuracy: 0.9188 - val_loss: 0.2917
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9109 - loss: 0.3098 - val_accuracy: 0.9370 - val_loss: 0.2276
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9267 - loss: 0.2549 - val_accuracy: 0.9475 - val_loss: 0.1919
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9362 - loss: 0.2191 - val_accuracy: 0.9520 - val_loss: 0.1718
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9440 - loss: 0.1938 - val_accuracy: 0.9572 - val_loss: 0.1568


In [49]:
test_loss, sgd_acc = model_sgd.evaluate(x_test, y_test, verbose=0)
print(f"SGD - Test Accuracy: {sgd_acc:.4f}, Time: {sgd_time:.1f}s")

SGD - Test Accuracy: 0.9443, Time: 50.1s


In [51]:
# RMSprop Optimizer
print("\n--- Testing RMSprop Optimizer ---")
model_rmsprop = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(10, activation="softmax")
])


--- Testing RMSprop Optimizer ---


In [52]:
model_rmsprop.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])

In [53]:
start_time = time.time()
model_rmsprop.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)
rmsprop_time = time.time() - start_time

Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9257 - loss: 0.2538 - val_accuracy: 0.9645 - val_loss: 0.1132
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9667 - loss: 0.1126 - val_accuracy: 0.9772 - val_loss: 0.0810
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9759 - loss: 0.0790 - val_accuracy: 0.9798 - val_loss: 0.0796
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9812 - loss: 0.0625 - val_accuracy: 0.9797 - val_loss: 0.0833
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9852 - loss: 0.0515 - val_accuracy: 0.9825 - val_loss: 0.0737


In [54]:
test_loss, rmsprop_acc = model_rmsprop.evaluate(x_test, y_test, verbose=0)
print(f"RMSprop - Test Accuracy: {rmsprop_acc:.4f}, Time: {rmsprop_time:.1f}s")

RMSprop - Test Accuracy: 0.9777, Time: 65.6s


# RESULTS SUMMARY

In [67]:
results = [
    ("Basic (ReLU + Softmax + Adam)", test_acc, test_acc_time),
    ("Sigmoid Hidden Layers", sigmoid_acc, sigmoid_time),
    ("Sigmoid Output", sigmoid_out_acc, "N/A"),
    ("ReLU Output", relu_out_acc, "N/A"),
    ("4 Hidden Layers", deep_acc, deep_time),
    ("SGD Optimizer", sgd_acc, sgd_time),
    ("RMSprop Optimizer", rmsprop_acc, rmsprop_time)
]

In [68]:
print(f"{'Configuration':<25} {'Accuracy':<10} {'Time (s)':<10}")
print("-" * 50)
for name, acc, time_val in results:
    time_str = f"{time_val:.1f}" if time_val != "N/A" else "N/A"
    print(f"{name:<25} {acc:<10.4f} {time_str:<10}")

Configuration             Accuracy   Time (s)  
--------------------------------------------------
Basic (ReLU + Softmax + Adam) 0.9767     71.8      
Sigmoid Hidden Layers     0.9721     88.8      
Sigmoid Output            0.9745     N/A       
ReLU Output               0.0980     N/A       
4 Hidden Layers           0.9751     73.2      
SGD Optimizer             0.9443     50.1      
RMSprop Optimizer         0.9777     65.6      


In [65]:
print(f"\nKEY FINDINGS:")
print(f"1. ReLU vs Sigmoid: ReLU {'wins' if test_acc > sigmoid_acc else 'loses'} ({test_acc:.4f} vs {sigmoid_acc:.4f})")
print(f"2. Softmax vs others: Softmax gets {test_acc:.4f}, Sigmoid gets {sigmoid_out_acc:.4f}")
print(f"3. Network depth: 4 layers {'improve' if deep_acc > test_acc else 'hurt'} performance")
print(f"4. Fastest optimizer: {min([(test_acc_time, 'Adam'), (sgd_time, 'SGD'), (rmsprop_time, 'RMSprop')])[1]}")


KEY FINDINGS:
1. ReLU vs Sigmoid: ReLU wins (0.9767 vs 0.9721)
2. Softmax vs others: Softmax gets 0.9767, Sigmoid gets 0.9745
3. Network depth: 4 layers hurt performance
4. Fastest optimizer: SGD
