## This example shows how to use hyperopt to pick parameters for a MLP classifier on the MNIST handwritten digits dataset

In [4]:
import warnings
warnings.filterwarnings("ignore")

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
import numpy as np

mnist = fetch_openml('mnist_784')

# Scale the images to be between 0 and 1
X = mnist.data / 255.
y = mnist.target

# Split into training and testing sets
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [8]:
# Define an objective function to minimize
# The classifier will be created, trained, and scored within this function
def objective(args):
    
    # Build a classifier based on the parameters chosen
    mlp = MLPClassifier(hidden_layer_sizes=(int(args['layer_size']),),
                        max_iter=10,
                        alpha=args['alpha'],
                        solver=args['algorithm'],
                        tol=1e-4, 
                        random_state=1,
                        activation=args['activation'], 
                        learning_rate_init=args['learning_rate']
                       )
    
    # Fit the classifier to the training data
    mlp.fit(X_train, y_train)
    
    #NOTE: Normally you should use a separate 'validation' set here
    #      and have a 'test' set that is only used on the final classifier
    #      once parameters have been selected, the final classifier can be
    #      retrained on both the 'training' and 'validation' sets
    loss = -mlp.score(X_test, y_test)
    
    # Must return loss and status, any additional information can also be saved here.
    # In this example the fully trained model is also returned
    return {'loss': loss, 'status': STATUS_OK, 'model':mlp}

In [11]:
# Define the parameter space to search over
# In this case the objective function is expecting a single dictionary argument, 
# so the space variable is set up to match that
space = {'layer_size':hp.quniform('layer_size', 25, 100, 1),
         'alpha':hp.lognormal('alpha', mu=np.log(1e-4), sigma=1),
         'algorithm':hp.choice('algorithm', ['lbfgs', 'sgd', 'adam']),
         'activation':hp.choice('activation', ['logistic', 'tanh', 'relu']),
         #'learning_rate':hp.uniform('learning_rate', low=0.001, high=0.999),
         'learning_rate':hp.loguniform('learning_rate', low=np.log(1e-4), high=np.log(1.)),
        }

In [12]:
# Create a Trials object to store results of each evaluation
trials = Trials()

# Run the search for the specified number of evaluations
best = fmin(objective,
            space=space,
            algo=tpe.suggest,
            trials=trials,
            max_evals=10)

100%|██████████| 10/10 [01:43<00:00, 10.34s/trial, best loss: -0.9775]


In [13]:
# Get the trained model from the best trial
best_model = trials.best_trial['result']['model']

# Compute the training and testing scores on this model
print("Training Accuracy: %f" % best_model.score(X_train, y_train))
print("Testing Accuracy: %f" % best_model.score(X_test, y_test))

Training Accuracy: 0.994367
Testing Accuracy: 0.977500


In [14]:
best

{'activation': 0,
 'algorithm': 1,
 'alpha': 0.0002548917899569714,
 'layer_size': 78.0,
 'learning_rate': 0.4305299897724264}

In [15]:
trials.best_trial

{'state': 2,
 'tid': 5,
 'spec': None,
 'result': {'loss': -0.9775,
  'status': 'ok',
  'model': MLPClassifier(activation='logistic', alpha=0.0002548917899569714,
                hidden_layer_sizes=(78,), learning_rate_init=0.4305299897724264,
                max_iter=10, random_state=1, solver='sgd')},
 'misc': {'tid': 5,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'activation': [5],
   'algorithm': [5],
   'alpha': [5],
   'layer_size': [5],
   'learning_rate': [5]},
  'vals': {'activation': [0],
   'algorithm': [1],
   'alpha': [0.0002548917899569714],
   'layer_size': [78.0],
   'learning_rate': [0.4305299897724264]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2024, 5, 5, 19, 10, 42, 303000),
 'refresh_time': datetime.datetime(2024, 5, 5, 19, 10, 57, 224000)}

In [16]:
trials.results

[{'loss': -0.8106,
  'status': 'ok',
  'model': MLPClassifier(activation='logistic', alpha=0.0001064369228523456,
                hidden_layer_sizes=(95,), learning_rate_init=0.23091432935502323,
                max_iter=10, random_state=1, solver='lbfgs')},
 {'loss': -0.8214,
  'status': 'ok',
  'model': MLPClassifier(activation='logistic', alpha=0.0002676406583180447,
                hidden_layer_sizes=(94,),
                learning_rate_init=0.00032816177283780275, max_iter=10,
                random_state=1, solver='lbfgs')},
 {'loss': -0.944,
  'status': 'ok',
  'model': MLPClassifier(activation='tanh', alpha=0.00011136893118170627,
                hidden_layer_sizes=(95,), learning_rate_init=0.6230101111806607,
                max_iter=10, random_state=1, solver='sgd')},
 {'loss': -0.9724,
  'status': 'ok',
  'model': MLPClassifier(alpha=0.0001726424609834143, hidden_layer_sizes=(84,),
                learning_rate_init=0.28559769015889097, max_iter=10,
                random_st