In [None]:
!pip install keras-tuner -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# https://github.com/osamaghaliah/mnist_id_based_model.git

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Loading the MNIST dataset among TensorFlow's datasets.
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 32 bit precision.
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Convert the labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
'''
This code snippet is dedicated to work with MNIST dataset as follows:

  1. Limiting each FC layer's neurons to be equal to a pair of digits extracted off a fixed ID number => 322509357.
  2. Using Dense layers for the FC layers and Flatten for the input layer => to allow operation on 1D vectors.
  3. Dropout & Batch Norm are both used for the normalization process to prevent the model from overfitting.
  4. ReLu activation function is used in every FC layer and Softmax for the output layer.

Metric used: Accuracy.
Test loss: 0.0994
Test accuracy: 0.9707
'''

# Extracting ID by pairs of digits - starting from the very right.

def extract_pairs_as_neurons(id):
    return [int(id[-2:]), int(id[-4:-2]), int(id[-6:-4]), int(id[-8:-6])]

# Storing the extracted pairs in an array.
neurons = extract_pairs_as_neurons("322509357")

# Declaring the dedicated neurons for each layer (ID-Based neurons).
for index in range(len(neurons)):
    print(f"Neurons dedicated for Layer {index + 1} ==> {neurons[index]}\n")

'''
Setting up the model layers according to the fixed pairs of digits extracted above.
  ==> 1st FC Layer: fixed 57 neurons
  ==> 2nd FC Layer: fixed 93 neurons
  ==> 3rd FC Layer: fixed 50 neurons
  ==> 4th FC Layer: fixed 22 neurons
'''
model = models.Sequential()

# Add a Flatten layer to handle input shape
model.add(layers.Flatten(input_shape=(28, 28)))

# Fully connected layers with Batch Normalization and Dropout (using ID logic for units and tuner's results for dropout and learning rate)
# 1st FC Layer: fixed 57 neurons
model.add(layers.Dense(neurons[0])) # 1st rightmost pair of digits.
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(0.2))  # Tuned dropout_0

# 2nd FC Layer: fixed 93 neurons
model.add(layers.Dense(neurons[1]))  # 2nd rightmost pair of digits.
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(0.2))  # Tuned dropout_1

# 3rd FC Layer: fixed 50 neurons
model.add(layers.Dense(neurons[2]))  # 3rd rightmost pair of digits.
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(0.1))  # Tuned dropout_2

# 4th FC Layer: fixed 22 neurons
model.add(layers.Dense(neurons[3]))  # 4th rightmost pair of digits.
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(0.1))  # Tuned dropout_3

# Output layer
model.add(layers.Dense(10, activation="softmax"))

# Compile the model with the tuned learning rate = 0.001
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Print model summary
model.summary()

# Train the model
model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=128,
    validation_split=0.2
)

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test loss: {loss:.4f}")
print(f"Test accuracy: {accuracy:.4f}")

Neurons dedicated for Layer 1 ==> 57

Neurons dedicated for Layer 2 ==> 93

Neurons dedicated for Layer 3 ==> 50

Neurons dedicated for Layer 4 ==> 22



Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.6100 - loss: 1.2936 - val_accuracy: 0.9306 - val_loss: 0.2532
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.8916 - loss: 0.3967 - val_accuracy: 0.9517 - val_loss: 0.1724
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9152 - loss: 0.3091 - val_accuracy: 0.9584 - val_loss: 0.1452
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9247 - loss: 0.2644 - val_accuracy: 0.9603 - val_loss: 0.1370
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9366 - loss: 0.2302 - val_accuracy: 0.9638 - val_loss: 0.1257
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9414 - loss: 0.2090 - val_accuracy: 0.9663 - val_loss: 0.1182
Epoch 7/10
[1m375/375[0m 

In [None]:
'''
This code snippet represents the Hyperparameters Tuning phase using Hyperband and focusing on:

  ==> Balancing the ID-based neuron logic with optimal dropout rates and learning rates.

Near Optimal Hyperparameters Found::
  1. Layer 1: (units_0 = 68, dropout_0: 0.2)
  2. Layer 2: (units_1 = 78, dropout_1 = 0.2)
  3. Layer 3: (units_2 = 59, dropout_2 = 0.1)
  4. Layer 4: (units_3 = 18, dropout_3 = 0.1)
  5. Model compiles with: (learning_rate = 0.001)

Best model test loss: 0.0919
Best model test accuracy: 0.9726
'''

import keras_tuner as kt
from tensorflow.keras.optimizers import Adam

# Model-building helping function for the tuner.
def build_model(hp):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=(28, 28)))

    # Constrain the tuner's search space around ID-derived values.
    model.add(layers.Dense(
        units=hp.Int('units_0', min_value=40, max_value=80, step=4),  # Around 57
        activation='relu'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(hp.Float('dropout_0', min_value=0.1, max_value=0.4, step=0.1)))

    model.add(layers.Dense(
        units=hp.Int('units_1', min_value=70, max_value=120, step=4),  # Around 93
        activation='relu'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.4, step=0.1)))

    model.add(layers.Dense(
        units=hp.Int('units_2', min_value=35, max_value=65, step=2),  # Around 50
        activation='relu'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.4, step=0.1)))

    model.add(layers.Dense(
        units=hp.Int('units_3', min_value=15, max_value=30, step=1),  # Around 22
        activation='relu'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(hp.Float('dropout_3', min_value=0.1, max_value=0.4, step=0.1)))

    # Output layer - Softmax-based.
    model.add(layers.Dense(10, activation="softmax"))

    # Optimizer - Adam algorithm is used.
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        ),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

# Instantiate the tuner - focusing on val_accuracy, max_epochs = 10 and factor = 3.
tuner = kt.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=10,
    factor=3,
    directory="my_dir",
    project_name="mnist_tuning"
)

# Perform the hyperparameter search process (Hyperband-based).
tuner.search(
    x_train, y_train,
    epochs=10,
    validation_split=0.2
)

# Get the best hyperparameters from the tuner.
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Near Optimal Hyperparameters Found:")
for param, value in best_hps.values.items():
    print(f"{param}: {value}")

# Build and train the model with the best hyperparameters found.
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(
    x_train, y_train,
    epochs=10,
    validation_split=0.2
)

# Evaluate the best model
loss, accuracy = best_model.evaluate(x_test, y_test)
print(f"Best model test loss: {loss:.4f}")
print(f"Best model test accuracy: {accuracy:.4f}")

Trial 30 Complete [00h 01m 35s]
val_accuracy: 0.9674999713897705

Best val_accuracy So Far: 0.9713333249092102
Total elapsed time: 00h 20m 23s
Best hyperparameters:
units_0: 68
dropout_0: 0.2
units_1: 78
dropout_1: 0.2
units_2: 59
dropout_2: 0.1
units_3: 18
dropout_3: 0.1
learning_rate: 0.001
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0020
Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.6997 - loss: 0.9821 - val_accuracy: 0.9443 - val_loss: 0.1857
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9011 - loss: 0.3396 - val_accuracy: 0.9572 - val_loss: 0.1476
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9228 - loss: 0.2694 - val_accuracy: 0.9625 - val_loss: 0.1320
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.9300 - loss: 0.24