In [2]:
# Neural networks offer great flexibility and power, but this comes at the cost of many tunable hyperparameters.
# Choosing the right values for these hyperparameters can make a significant difference in model performance.
# This script explores techniques for hyperparameter tuning using the California Housing dataset.
# We'll prepare the data and build a framework for training, validating, and evaluating neural networks,
# with the goal of finding an optimal configuration for this specific regression task.

# Let's start by importing the California Housing dataset from Scikit-Learn
from sklearn.datasets import fetch_california_housing  # Load the California housing dataset
from sklearn.model_selection import train_test_split    # Tool to split data into train/validation/test sets
from sklearn.preprocessing import StandardScaler        # Tool to scale features to standard normal distribution

# Load the dataset (features and target)
housing = fetch_california_housing()

# Split data into training and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target)

# Further split training data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

# Standardize the features: zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)      # Fit on training data and transform it
X_valid = scaler.transform(X_valid)          # Use the same transformation on validation data
X_test = scaler.transform(X_test)            # Use the same transformation on test data

In [5]:
# Import Keras from TensorFlow to build and train deep learning models
from tensorflow import keras

# Define a function that builds a customizable neural network model
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=(8,), **kwargs):
    # Create a Sequential model (a linear stack of layers)
    model = keras.models.Sequential()
    
    # Add an input layer with the specified shape
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    
    # Add the specified number of hidden layers, each with ReLU activation
    for layer in range(n_hidden): # for _ in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    
    # Add an output layer with a single neuron (for regression)
    model.add(keras.layers.Dense(1))
    
    # Use stochastic gradient descent (SGD) optimizer with the specified learning rate
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    
    # Compile the model with mean squared error loss (appropriate for regression tasks)
    # Add metrics=["mae"] to pass on a metric otherwise KerasRegressor throws the following error:
    # ValueError: Could not interpret metric identifier: loss
    model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=["mae"])
    
    # Return the compiled model
    return model  

In [6]:
from scikeras.wrappers import KerasRegressor

# Wrap the Keras model into a Scikit-Learn compatible regressor
# This allows you to use Scikit-Learn tools like cross-validation and grid search
keras_reg = KerasRegressor(
    model=build_model,
    model__n_hidden=2,
    model__n_neurons=30,
    model__learning_rate=0.01,
    model__input_shape=(X_train.shape[1],)  # shape should match input data
    #metrics=None  # Prevents Scikeras from misinterpreting the loss
)

# Train the model on the training data
# - Run for up to 100 epochs
# - Use the validation set to monitor overfitting
# - Apply early stopping to halt training if validation loss doesn't improve for 10 consecutive epochs
keras_reg.fit(
    X_train, y_train,
    epochs=100,
    validation_data=(X_valid, y_valid),
    callbacks=[keras.callbacks.EarlyStopping(patience=10)]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [7]:
# Evaluate the model's performance on the test set
mse_test = keras_reg.score(X_test, y_test)
print(f"Mean Squared Error on test set: {mse_test:.2}")

# Select the first 3 samples from the test set to simulate new/unseen input data
X_new = X_test[:3]

# Use the trained model to predict the target values for these new samples
y_pred = keras_reg.predict(X_new)
print(f"Predictions for new samples: {y_pred}")

Mean Squared Error on test set: 0.78
Predictions for new samples: [2.4776602 1.3015292 1.0385396]


In [8]:
import numpy as np
# For sampling learning rates from a logarithmic distribution
from scipy.stats import reciprocal
# Tool to search over hyperparameter space using randomized sampling
from sklearn.model_selection import RandomizedSearchCV

# Define distributions of hyperparameters to sample from
param_distribs = {
    "n_hidden": [0, 1, 2, 3],                     # Number of hidden layers to try
    "n_neurons": np.arange(1, 100),               # Range of neurons per hidden layer (1 to 99)
    "learning_rate": reciprocal(3e-4, 3e-2)       # Log-uniform distribution between 0.0003 and 0.03
}

# Perform randomized hyperparameter search using 3-fold cross-validation
rnd_search_cv = RandomizedSearchCV(
    estimator=keras_reg,                # The KerasRegressor wrapper
    param_distributions=param_distribs,  # Distributions to sample from
    n_iter=10,                # Try 10 random combinations
    cv=3                      # Use 3-fold cross-validation
)

# Train the models on the training data using each sampled hyperparameter set
rnd_search_cv.fit(
    X_train, y_train,
    epochs=100,                                             # Train up to 100 epochs
    validation_data=(X_valid, y_valid),                     # Use validation data to monitor overfitting
    callbacks=[keras.callbacks.EarlyStopping(patience=10)]  # Stop early if validation loss doesn't improve for 10 epochs
)


ValueError: Invalid parameter learning_rate for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(learning_rate=0.02822382759081754)`
Check the list of available parameters with `estimator.get_params().keys()`

In [None]:
# Get the best combination of hyperparameters found during the randomized search
best_params = rnd_search_cv.best_params_

# Print the best parameters
print("Best hyperparameters found:", best_params)

# Get the best cross-validation score achieved during the randomized search
best_score = rnd_search_cv.best_score_

# Print the best score (negative mean squared error by default in Scikit-Learn for regressors)
print("Best cross-validation score (negative MSE):", best_score)

# Retrieve the actual best Keras model from the randomized search
model = rnd_search_cv.best_estimator_.model