<a href="https://colab.research.google.com/github/manikantagangam/deeplearning-tutorials/blob/main/Exploring_different_Neural_Network_design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploring different Neural Network design choices for Digit classification using MNIST dataset with the help of Keras library.

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import time

In [2]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28)).astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28)).astype('float32') / 255

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


# 1. Number of Nodes


In [3]:
def create_model(num_nodes):
    model = models.Sequential()
    model.add(layers.Dense(num_nodes, activation='relu', input_shape=(28 * 28,)))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

num_nodes_list = [4, 32, 64, 128, 512, 2056]
for num_nodes in num_nodes_list:
    model = create_model(num_nodes)
    start_time = time.time()
    model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2, verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    _, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
    _, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Number of Nodes: {num_nodes}, Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds, Number of Parameters: {model.count_params()}")


Number of Nodes: 4, Train Accuracy: 0.8463, Test Accuracy: 0.8466, Training Time: 13.25 seconds, Number of Parameters: 3190
Number of Nodes: 32, Train Accuracy: 0.9677, Test Accuracy: 0.9618, Training Time: 21.18 seconds, Number of Parameters: 25450
Number of Nodes: 64, Train Accuracy: 0.9832, Test Accuracy: 0.9721, Training Time: 21.35 seconds, Number of Parameters: 50890
Number of Nodes: 128, Train Accuracy: 0.9904, Test Accuracy: 0.9754, Training Time: 11.19 seconds, Number of Parameters: 101770
Number of Nodes: 512, Train Accuracy: 0.9941, Test Accuracy: 0.9787, Training Time: 11.53 seconds, Number of Parameters: 407050
Number of Nodes: 2056, Train Accuracy: 0.9953, Test Accuracy: 0.9808, Training Time: 21.18 seconds, Number of Parameters: 1634530


# 2. Number of Layers

In [4]:
def create_model(num_layers):
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(28 * 28,)))
    for _ in range(num_layers - 1):
        model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

num_layers_list = [4, 6, 8, 16]
for num_layers in num_layers_list:
    model = create_model(num_layers)
    start_time = time.time()
    model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2, verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    _, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
    _, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Number of Layers: {num_layers}, Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds, Number of Parameters: {model.count_params()}")


Number of Layers: 4, Train Accuracy: 0.9770, Test Accuracy: 0.9627, Training Time: 14.63 seconds, Number of Parameters: 63370
Number of Layers: 6, Train Accuracy: 0.9829, Test Accuracy: 0.9672, Training Time: 16.59 seconds, Number of Parameters: 71690
Number of Layers: 8, Train Accuracy: 0.9838, Test Accuracy: 0.9685, Training Time: 17.33 seconds, Number of Parameters: 80010
Number of Layers: 16, Train Accuracy: 0.9777, Test Accuracy: 0.9668, Training Time: 25.99 seconds, Number of Parameters: 113290


# 3. Layer-node combinations

In [5]:
def create_model(nodes_per_layer):
    model = models.Sequential()
    for nodes in nodes_per_layer:
        model.add(layers.Dense(nodes, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

architectures = [
    [256, 128, 64, 32],
    [128, 128, 128, 128],
]

for nodes_per_layer in architectures:
    model = create_model(nodes_per_layer)
    start_time = time.time()
    model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2, verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    _, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
    _, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Architecture: {nodes_per_layer}, Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds, Number of Parameters: {model.count_params()}")


Architecture: [256, 128, 64, 32], Train Accuracy: 0.9938, Test Accuracy: 0.9801, Training Time: 21.88 seconds, Number of Parameters: 244522
Architecture: [128, 128, 128, 128], Train Accuracy: 0.9901, Test Accuracy: 0.9757, Training Time: 14.42 seconds, Number of Parameters: 151306


# 4. Input Size

In [6]:
def create_model():
    model = models.Sequential()
    model.add(layers.Dense(256, activation='relu', input_shape=(28 * 28,)))
    for _ in range(3):
        model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
start_time = time.time()
model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2, verbose=0)
end_time = time.time()
training_time = end_time - start_time

_, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
_, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)

print(f"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds, Number of Parameters: {model.count_params()}")

Train Accuracy: 0.9894, Test Accuracy: 0.9751, Training Time: 21.45 seconds, Number of Parameters: 400906


# 5. Dataset Split

In [7]:
def create_model():
    model = models.Sequential()
    model.add(layers.Dense(256, activation='relu', input_shape=(28 * 28,)))
    for _ in range(3):
        model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
start_time = time.time()
model.fit(train_images[:1000], train_labels[:1000], epochs=10, batch_size=128, validation_split=0.2, verbose=0)
end_time = time.time()
training_time = end_time - start_time

_, train_accuracy = model.evaluate(train_images[:1000], train_labels[:1000], verbose=0)
_, test_accuracy = model.evaluate(test_images[:1000], test_labels[:1000], verbose=0)

print(f"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds, Number of Parameters: {model.count_params()}")


Train Accuracy: 0.9680, Test Accuracy: 0.8530, Training Time: 1.97 seconds, Number of Parameters: 400906


# 6. Activation function

In [8]:
def create_model(activation):
    model = models.Sequential()
    model.add(layers.Dense(64, activation=activation, input_shape=(28 * 28,)))
    for _ in range(3):
        model.add(layers.Dense(64, activation=activation))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_evaluate(activation, epochs):
    model = create_model(activation)
    start_time = time.time()
    model.fit(train_images, train_labels, epochs=epochs, batch_size=128, validation_split=0.2, verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    _, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
    _, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Activation: {activation}, Epochs: {epochs}, Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds")

activations = ['sigmoid', 'tanh', 'relu']
for activation in activations:
    train_and_evaluate(activation, epochs=10)

print("\nRunning models for 30 epochs:")
for activation in activations:
    train_and_evaluate(activation, epochs=30)


Activation: sigmoid, Epochs: 10, Train Accuracy: 0.9725, Test Accuracy: 0.9564, Training Time: 21.45 seconds
Activation: tanh, Epochs: 10, Train Accuracy: 0.9886, Test Accuracy: 0.9712, Training Time: 14.70 seconds
Activation: relu, Epochs: 10, Train Accuracy: 0.9879, Test Accuracy: 0.9749, Training Time: 21.49 seconds

Running models for 30 epochs:
Activation: sigmoid, Epochs: 30, Train Accuracy: 0.9900, Test Accuracy: 0.9647, Training Time: 38.87 seconds
Activation: tanh, Epochs: 30, Train Accuracy: 0.9868, Test Accuracy: 0.9652, Training Time: 41.94 seconds
Activation: relu, Epochs: 30, Train Accuracy: 0.9887, Test Accuracy: 0.9728, Training Time: 38.91 seconds


# 7. Activation function combinations

In [9]:
def create_model(activation1, activation2, activation3):
    model = models.Sequential()
    model.add(layers.Dense(32, activation=activation1, input_shape=(28 * 28,)))
    model.add(layers.Dense(32, activation=activation2))
    model.add(layers.Dense(32, activation=activation3))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_evaluate(activation1, activation2, activation3):
    model = create_model(activation1, activation2, activation3)
    start_time = time.time()
    model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2, verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    _, train_accuracy = model.evaluate(train_images, train_labels, verbose=0)
    _, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Activation Functions: {activation1}, {activation2}, {activation3}, Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds")

activation_combinations = [
    ('sigmoid', 'sigmoid', 'sigmoid'),
    ('relu', 'relu', 'relu'),
    ('tanh', 'tanh', 'tanh'),
    ('sigmoid', 'relu', 'tanh'),
]

for activation_combination in activation_combinations:
    train_and_evaluate(*activation_combination)


Activation Functions: sigmoid, sigmoid, sigmoid, Train Accuracy: 0.9616, Test Accuracy: 0.9525, Training Time: 13.10 seconds
Activation Functions: relu, relu, relu, Train Accuracy: 0.9770, Test Accuracy: 0.9648, Training Time: 13.01 seconds
Activation Functions: tanh, tanh, tanh, Train Accuracy: 0.9807, Test Accuracy: 0.9658, Training Time: 21.36 seconds
Activation Functions: sigmoid, relu, tanh, Train Accuracy: 0.9711, Test Accuracy: 0.9611, Training Time: 21.39 seconds
