In [3]:
import keras
import keras_tuner as kt
import numpy as np
from sklearn.model_selection import train_test_split
from skimage.transform import resize
import shutil
import os

# Clean up previous runs
if os.path.exists("kt_hyperband"):
    shutil.rmtree("kt_hyperband")
if os.path.exists("kt_random"):
    shutil.rmtree("kt_random")

In [4]:
# Load and prepare data (same as Exercise 1)
sym_dim = 8
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

X_train_full = X_train_full.reshape(-1, 28 * 28).astype("float32") / 255.0
X_test = X_test.reshape(-1, 28 * 28).astype("float32") / 255.0

X_train_full = resize(X_train_full.reshape(-1, 28, 28), (len(X_train_full), sym_dim, sym_dim)).reshape(-1, sym_dim*sym_dim).astype("float32")
X_test = resize(X_test.reshape(-1, 28, 28), (len(X_test), sym_dim, sym_dim)).reshape(-1, sym_dim*sym_dim).astype("float32")

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=42, stratify=y_train_full
)

print(f"X_train: {X_train.shape}, X_val: {X_val.shape}, X_test: {X_test.shape}")

X_train: (48000, 64), X_val: (12000, 64), X_test: (10000, 64)


In [5]:
# Define model builder for Keras Tuner
# Same search space as Exercise 1

def build_model(hp):
    # Number of layers (1-4)
    num_layers = hp.Int("num_layers", min_value=1, max_value=4)
    
    # Dropout rate (0.0 - 0.4)
    dropout_rate = hp.Float("dropout_rate", min_value=0.0, max_value=0.4)
    
    # Learning rate (1e-4 to 1e-2, log scale)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    
    # Optimizer choice
    optimizer_name = hp.Choice("optimizer", values=["adam", "sgd", "rmsprop"])
    
    # Batch size is handled in tuner.search(), not here
    
    # Build model
    inputs = keras.layers.Input(shape=(sym_dim * sym_dim,))
    x = inputs
    
    for i in range(num_layers):
        # Units per layer (64, 128, 256, 512)
        units = hp.Choice(f"units_{i}", values=[64, 128, 256, 512])
        x = keras.layers.Dense(units)(x)
        x = keras.layers.Activation("relu")(x)
        if dropout_rate > 0:
            x = keras.layers.Dropout(dropout_rate)(x)
    
    outputs = keras.layers.Dense(10, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    
    # Select optimizer
    if optimizer_name == "adam":
        opt = keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == "sgd":
        opt = keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        opt = keras.optimizers.RMSprop(learning_rate=learning_rate)
    
    model.compile(
        optimizer=opt,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    return model

In [6]:
# Hyperband tuner
hyperband_tuner = kt.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=20,
    factor=3,  # Reduction factor for successive halving
    hyperband_iterations=1,
    directory="kt_hyperband",
    project_name="fashion_mnist",
    overwrite=True
)

print("Hyperband search space:")
hyperband_tuner.search_space_summary()

I0000 00:00:1767199385.296251  300784 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2131 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Hyperband search space:
Search space summary
Default search space size: 5
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
dropout_rate (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.4, 'step': None, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
optimizer (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'sgd', 'rmsprop'], 'ordered': False}
units_0 (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128, 256, 512], 'ordered': True}


In [7]:
# Run Hyperband search
import time

early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)

start_time = time.time()
hyperband_tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=64,  # Fixed batch size for fair comparison
    callbacks=[early_stop],
    verbose=1
)
hyperband_time = time.time() - start_time

print(f"\nHyperband completed in {hyperband_time:.1f} seconds")

Trial 30 Complete [00h 00m 19s]
val_accuracy: 0.7068333625793457

Best val_accuracy So Far: 0.8730833530426025
Total elapsed time: 00h 05m 34s

Hyperband completed in 333.8 seconds


In [8]:
# Get best hyperparameters from Hyperband
best_hps = hyperband_tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters (Hyperband):")
print(f"  num_layers: {best_hps.get('num_layers')}")
print(f"  dropout_rate: {best_hps.get('dropout_rate'):.3f}")
print(f"  learning_rate: {best_hps.get('learning_rate'):.6f}")
print(f"  optimizer: {best_hps.get('optimizer')}")
for i in range(best_hps.get('num_layers')):
    print(f"  units_{i}: {best_hps.get(f'units_{i}')}")

Best hyperparameters (Hyperband):
  num_layers: 3
  dropout_rate: 0.142
  learning_rate: 0.000358
  optimizer: rmsprop
  units_0: 64
  units_1: 256
  units_2: 512


In [9]:
# Random search for comparison (same budget)
random_tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=30,  # Similar number of trials as Hyperband
    directory="kt_random",
    project_name="fashion_mnist",
    overwrite=True
)

start_time = time.time()
random_tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1
)
random_time = time.time() - start_time

print(f"\nRandom search completed in {random_time:.1f} seconds")

Trial 30 Complete [00h 00m 22s]
val_accuracy: 0.7699999809265137

Best val_accuracy So Far: 0.8845833539962769
Total elapsed time: 00h 10m 24s

Random search completed in 624.0 seconds


In [10]:
# Compare results
best_hps_random = random_tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters (Random Search):")
print(f"  num_layers: {best_hps_random.get('num_layers')}")
print(f"  dropout_rate: {best_hps_random.get('dropout_rate'):.3f}")
print(f"  learning_rate: {best_hps_random.get('learning_rate'):.6f}")
print(f"  optimizer: {best_hps_random.get('optimizer')}")

Best hyperparameters (Random Search):
  num_layers: 3
  dropout_rate: 0.254
  learning_rate: 0.000408
  optimizer: adam


In [11]:
# Evaluate best models on test set
best_model_hb = hyperband_tuner.get_best_models(num_models=1)[0]
best_model_rs = random_tuner.get_best_models(num_models=1)[0]

_, acc_hb = best_model_hb.evaluate(X_test, y_test, verbose=0)
_, acc_rs = best_model_rs.evaluate(X_test, y_test, verbose=0)

print("\n" + "="*50)
print("COMPARISON SUMMARY")
print("="*50)
print(f"Hyperband:     test acc = {acc_hb:.4f}, time = {hyperband_time:.1f}s")
print(f"Random Search: test acc = {acc_rs:.4f}, time = {random_time:.1f}s")
print("="*50)

  saveable.load_own_variables(weights_store.get(inner_path))




COMPARISON SUMMARY
Hyperband:     test acc = 0.8588, time = 333.8s
Random Search: test acc = 0.8765, time = 624.0s
