In [1]:
import sys

# Increase recursion limit to prevent potential issues
sys.setrecursionlimit(100_000)

In [2]:
!pip install keras-tuner


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
import os
import warnings

# suppress all Python warnings
warnings.filterwarnings('ignore')

# Set Tensorflow log level to suppress warnings and info messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

2025-06-22 17:16:27.875126: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from sklearn.model_selection import train_test_split

# Load the MNIST dataset
(X_train, y_train), _ = keras.datasets.mnist.load_data()

# Split into train+val and test (80/20)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Split train+val into train and validation (75/25 of 80% = 60/20 overall)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [5]:
X_train, X_valid = X_train / 255., X_valid / 255.
X_test = X_test / 255.

print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_valid.shape}")
print(f"Test data shape: {X_test.shape}")

Training data shape: (38400, 28, 28)
Validation data shape: (9600, 28, 28)
Test data shape: (12000, 28, 28)


In [6]:
def build_model(hp: kt.HyperParameters):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'),
        Dense(units=10, activation='softmax')
    ])

    # Compile model
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [7]:
# Create a RandomSearch Tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='tuner',
    project_name='intro_to_kt'
)
tuner.search_space_summary()

Reloading Tuner from tuner/intro_to_kt/tuner0.json
Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


### Running the hyperparameter search

In [8]:
tuner.search(X_train, y_train, validation_data=(X_valid, y_valid), epochs=5)
tuner.results_summary()

Results summary
Results in tuner/intro_to_kt
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 08 summary
Hyperparameters:
units: 480
learning_rate: 0.0006620419808746878
Score: 0.9761458337306976

Trial 04 summary
Hyperparameters:
units: 448
learning_rate: 0.0009271244766046284
Score: 0.9743229150772095

Trial 02 summary
Hyperparameters:
units: 512
learning_rate: 0.0002485239761234895
Score: 0.9705208241939545

Trial 05 summary
Hyperparameters:
units: 512
learning_rate: 0.0002421696251742364
Score: 0.9699479043483734

Trial 06 summary
Hyperparameters:
units: 96
learning_rate: 0.00330588674286921
Score: 0.9688020646572113

Trial 07 summary
Hyperparameters:
units: 192
learning_rate: 0.006351062743654629
Score: 0.965416669845581

Trial 03 summary
Hyperparameters:
units: 416
learning_rate: 0.00012015957297929576
Score: 0.9597916603088379

Trial 09 summary
Hyperparameters:
units: 160
learning_rate: 0.00010957237595130019
Score: 0.9474479258060455

Trial 01 summa

### Analyzing and using the best hyperparameters

In [9]:
best_hps: kt.HyperParameters = tuner.get_best_hyperparameters(num_trials=1)[0]

In [10]:
print(f"""
The optimal number of units in the first dense layer is: {best_hps['units']}
The optimal learning rate is {best_hps['learning_rate']}
""")


The optimal number of units in the first dense layer is: 480
The optimal learning rate is 0.0006620419808746878



In [11]:
# Build and Train the Model with Best Hyperparameters
model_ = tuner.hypermodel.build(best_hps)
model_.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

# Evaluate the Model on the test set
val_loss, val_acc = model_.evaluate(X_test, y_test)
print('Test loss:', val_loss)
print('Test accuracy:', val_acc)

Epoch 1/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8635 - loss: 0.4679 - val_accuracy: 0.9538 - val_loss: 0.1527
Epoch 2/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9647 - loss: 0.1233 - val_accuracy: 0.9659 - val_loss: 0.1096
Epoch 3/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9779 - loss: 0.0758 - val_accuracy: 0.9735 - val_loss: 0.0875
Epoch 4/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9860 - loss: 0.0502 - val_accuracy: 0.9761 - val_loss: 0.0778
Epoch 5/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9895 - loss: 0.0369 - val_accuracy: 0.9762 - val_loss: 0.0789
Epoch 6/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9932 - loss: 0.0271 - val_accuracy: 0.9739 - val_loss: 0.0846
Epoch 7/10
[1m1