# Objective 01 - describe the major hyperparameters to tune

Neural networks have more parameters to tune than the other models we've worked with so far. Some of the most important ones are:

batch size

learning rate and number of training epochs

activation function

number of neurons in the hidden layer(s)

optimization algorithms






In [1]:
# Example modified from:
# https://chrisalbon.com/deep_learning/keras/tuning_neural_network_hyperparameters/

# imports to create the classification
from sklearn.datasets import make_classification

# define the number of features
num_features = 50

# Generate features matrix and target vector
# binary classification (two classes)

features, target = make_classification(n_samples=10000,
                                       n_features=num_features,
                                       n_informative=2,
                                       n_redundant=0,
                                       n_classes=2,
                                       weights=[.5, .5],
                                       random_state=42
                                       )

# verify the size of the features and target
print('Features array shape: ', features.shape)
print('Target array shape: ', target.shape)


Features array shape:  (10000, 50)
Target array shape:  (10000,)


In [2]:
# Import keras models and layers
from tensorflow.keras import models
from tensorflow.keras import layers

# function to return a compiled network
def make_network(optimizer='adam'):

  # Instantiate a sequential model
  network = models.Sequential()

  # Add an input layer (shape=number of features)
  network.add(layers.Dense(units=8, activation='relu', input_shape=(num_features,)))

  # Add another hidden layer of 8 neurons
  network.add(layers.Dense(units=8, activation='relu'))  # Activation function is like sigmoid function that does the squishing

  # add an output layer with a sigmoid activation function
  network.add(layers.Dense(units=1, activation='sigmoid'))

  # Compile the network
  network.compile(loss='binary_crossentropy', # Cross-entropy
                  optimizer=optimizer, # Optimizer
                  metrics=['accuracy'] # Accuracy performance metric
                  )
  # Return the compiled network
  return network



In [4]:
# Scikit-learn wrappers for keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
neural_network = KerasClassifier(build_fn=make_network, verbose=0) # This makes an object for sklearn classifier API, defining estimator for GridSearchCV

In [5]:
# Define the hyperparameter space over which to search
epochs = [10, 25]
batches = [4, 8, 32]
optimizers = ['rmsprop', 'adam']

# make a dictionary of the parameters
hyperparameters = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)

# Create and fir the grid search
from sklearn.model_selection import GridSearchCV
grid =  GridSearchCV(estimator=neural_network, cv=5, param_grid=hyperparameters)
grid_result = grid.fit(features, target)

In [8]:
# take a look at the best parameters
grid_result.best_params_

{'batch_size': 4, 'epochs': 25, 'optimizer': 'adam'}

# Objective 02 - implement an experiment tracking framework


In [6]:
from sklearn.model_selection import train_test_split

# imports to create the classification and train test sets
from sklearn.datasets import make_classification

# define the number of features
num_features = 50

# Generate features matrix and target vector
# Binary classification
features, target = make_classification(n_samples=10000,
                                        n_features=num_features,
                                        n_informative=3, 
                                        n_redundant=0,
                                        n_classes=2,
                                        random_state=42)

X_train, X_test, y_train, y_test = train_test_split(
    features,
    target,
    test_size=0.25,
    random_state=42
)

In [7]:
%load_ext tensorboard

In [9]:
# imports 
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

# Specify the parameters and values
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([8, 16]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

# Evaluate the model using accuracy
METRIC_ACCURACY = 'accuracy'

# Write the function to create the logs
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')]
    )

# Adapt TensorFlow runs to log hyperparameters and metrics


In [16]:
# write the function to create the model with the
# specified hyperparameter tuning

def train_test_model(hparams):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
        tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
        tf.keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    model.compile(
        optimizer=hparams[HP_OPTIMIZER],
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # Run with 1 epoch to speed things up for demo purposes
    model.fit(X_train, y_train, epochs=1)
    _, accuracy = model.evaluate(X_test, y_test)
    
    return accuracy

# Log an hparams summary with the hyperparameters and final accuracy:


In [17]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams) # record the values used in this trial
        accuracy = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)


# Start runs and log them all under one parent directory


In [18]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
  for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
    for optimizer in HP_OPTIMIZER.domain.values:
      hparams = {
          HP_NUM_UNITS: num_units,
          HP_DROPOUT: dropout_rate,
          HP_OPTIMIZER: optimizer,
      }
      run_name = "run-%d" % session_num
      print('--- Starting trial: %s' % run_name)
      print({h.name: hparams[h] for h in hparams})
      run('logs/hparam_tuning/' + run_name, hparams)
      session_num += 1

--- Starting trial: run-0
{'num_units': 8, 'dropout': 0.1, 'optimizer': 'adam'}
--- Starting trial: run-1
{'num_units': 8, 'dropout': 0.1, 'optimizer': 'sgd'}
--- Starting trial: run-2
{'num_units': 8, 'dropout': 0.2, 'optimizer': 'adam'}
--- Starting trial: run-3
{'num_units': 8, 'dropout': 0.2, 'optimizer': 'sgd'}
--- Starting trial: run-4
{'num_units': 16, 'dropout': 0.1, 'optimizer': 'adam'}
--- Starting trial: run-5
{'num_units': 16, 'dropout': 0.1, 'optimizer': 'sgd'}
--- Starting trial: run-6
{'num_units': 16, 'dropout': 0.2, 'optimizer': 'adam'}
--- Starting trial: run-7
{'num_units': 16, 'dropout': 0.2, 'optimizer': 'sgd'}


In [12]:
HP_NUM_UNITS.domain.values, HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value, 

([8, 16], 0.1, 0.2)

# Visualize the results in TensorBoard's HParams plugin


In [2]:
%load_ext tensorboard

In [3]:
%tensorboard --logdir logs/hparam_tuning/

Launching TensorBoard...

In [21]:
%git

UsageError: Line magic function `%git` not found.
