In [1]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [2]:

from tensorflow.keras import models, layers

def create_model(initializer):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer=initializer, input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer=initializer))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu', kernel_initializer=initializer))
    # Change this line to have 10 units for the 10 classes of MNIST
    model.add(layers.Dense(10, activation='softmax', kernel_initializer=initializer))
    return model


initializers = ['he_normal', 'glorot_uniform', 'random_normal']
models = {init: create_model(init) for init in initializers}

for model in models.values():
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

  super().__init__(


In [3]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=3)

history_dict = {}
for name, model in models.items():
    print(f"Training model with {name} initialization")
    history = model.fit(train_images, train_labels, epochs=20, validation_split=0.1, callbacks=[early_stopping])
    history_dict[name] = history

Training model with he_normal initialization
Epoch 1/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 19ms/step - accuracy: 0.9217 - loss: 0.2540 - val_accuracy: 0.9845 - val_loss: 0.0487
Epoch 2/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 19ms/step - accuracy: 0.9879 - loss: 0.0395 - val_accuracy: 0.9887 - val_loss: 0.0424
Epoch 3/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 24ms/step - accuracy: 0.9926 - loss: 0.0238 - val_accuracy: 0.9885 - val_loss: 0.0477
Epoch 4/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 22ms/step - accuracy: 0.9944 - loss: 0.0166 - val_accuracy: 0.9883 - val_loss: 0.0470
Epoch 5/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 19ms/step - accuracy: 0.9953 - loss: 0.0151 - val_accuracy: 0.9898 - val_loss: 0.0428
Training model with glorot_uniform initialization
Epoch 1/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s

In [4]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

for name, model in models.items():
    predictions = model.predict(test_images)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(test_labels, axis=1)
    
    cm = confusion_matrix(true_classes, predicted_classes)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title(f'Confusion Matrix for {name}')
    plt.show()

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step


NameError: name 'np' is not defined

In [None]:
import matplotlib.pyplot as plt

for name, history in history_dict.items():
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)
    
    plt.figure(figsize=(12, 6))
    
    plt.subplot(1, 2, 1)  
    plt.plot(epochs, acc, 'bo-', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'r^-', label='Validation Accuracy')
    plt.title(f'Training and Validation Accuracy for {name}')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)  
    plt.plot(epochs, loss, 'bo-', label='Training Loss')
    plt.plot(epochs, val_loss, 'r^-', label='Validation Loss')
    plt.title(f'Training and Validation Loss for {name}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout() 
    plt.show()

In [None]:
test_accuracy = []
for name, model in models.items():
    _, accuracy = model.evaluate(test_images, test_labels)
    test_accuracy.append(accuracy)

plt.bar(initializers, test_accuracy)
plt.xlabel('Initializer')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy for Different Initializers')
plt.show()

# Table for Hyperparameters

| Hyperparameter | Values |
|-----------------------|-----------|
| Activation Function | Relu, SoftMAx|
| Weight Initializer | he_normal, glorot_uniform, random_normal |
| Number of hidden layers | 2 |
| Number of neurons in hidden layers | 32+64+64 = 160 |
| Loss Function | categorical_crossentropy |
| Optimizer | adam |
| Number of Epochs | 20 |
| Learning Rate  | 0.001  |
| Batch Size     | 32     |
| Evaluation Metrics | Accuracy |
