https://www.ax.dev/tutorials/

__Tune a CNN on MNIST__

1. [Import](#Import)
1. [Load MNIST data](#Load-MNIST-data)
1. [Define a function to optimize](#Define-a-function-to-optimize)
1. [Run the optimization loop](#Run-the-optimization-loop)
1. [Plot response surface](#Plot-response-surface)
1. [Plot best objective as function of the iteration](#Plot-best-objective-as-function-of-the-iteration)
1. [Train CNN with best hyperparameters and evaluate on test set](#Train-CNN-with-best-hyperparameters-and-evaluate-on-test-set)



# Import

<a id = 'Import'></a>

In [6]:
import torch
import numpy as np

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting
from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate

init_notebook_plotting()

[INFO 05-14 04:28:18] ipy_plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.


# Load MNIST data


<a id = 'Load-MNIST-data'></a>

In [11]:
#
train_loader, valid_loader, test_loader = load_mnist()

# Define a function to optimize


<a id = 'Define-a-function-to-optimize'></a>

In [12]:
#
dtype = torch.float
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def train_evaluate(parameterization):
    net = train(
        train_loader=train_loader,
        parameters=parameterization,
        dtype=dtype,
        device=device,
    )
    return evaluate(net=net, data_loader=valid_loader, dtype=dtype, device=device)

# Run the optimization loop



<a id = 'Run-the-optimization-loop'></a>

In [13]:
#
best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
    ],
    evaluation_function=train_evaluate,
    objective_name="accuracy",
)

[INFO 05-14 04:40:09] ax.service.utils.dispatch: Using Bayesian Optimization generation strategy. Iterations after 5 will take longer to generate due to model-fitting.
[INFO 05-14 04:40:09] ax.service.managed_loop: Started full optimization with 20 steps.
[INFO 05-14 04:40:09] ax.service.managed_loop: Running optimization trial 1...
[INFO 05-14 04:40:18] ax.service.managed_loop: Running optimization trial 2...
[INFO 05-14 04:40:27] ax.service.managed_loop: Running optimization trial 3...
[INFO 05-14 04:40:35] ax.service.managed_loop: Running optimization trial 4...
[INFO 05-14 04:40:44] ax.service.managed_loop: Running optimization trial 5...
[INFO 05-14 04:40:53] ax.service.managed_loop: Running optimization trial 6...
[INFO 05-14 04:41:02] ax.service.managed_loop: Running optimization trial 7...
[INFO 05-14 04:41:12] ax.service.managed_loop: Running optimization trial 8...
[INFO 05-14 04:41:22] ax.service.managed_loop: Running optimization trial 9...
[INFO 05-14 04:41:32] ax.service.

In [14]:
#
best_parameters

{'lr': 0.002406429628682412, 'momentum': 0.749737024307251}

In [15]:
#
means, covariances = values
print(means)
print(covariances)

{'accuracy': 0.9708328909297179}
{'accuracy': {'accuracy': 1.0481821595415295e-08}}


# Plot response surface



<a id = 'Plot-response-surface'></a>

In [16]:
#
render(
    plot_contour(model=model, param_x="lr", param_y="momentum", metric_name="accuracy")
)

# Plot best objective as function of the iteration



<a id = 'Plot-best-objective-as-function-of-the-iteration'></a>

In [17]:
#
best_objectives = np.array(
    [[trial.objective_mean * 100 for trial in experiment.trials.values()]]
)
best_objective_plot = optimization_trace_single_method(
    y=np.maximum.accumulate(best_objectives, axis=1),
    title="Model performance vs. # of iters",
    ylabel="Classification accuracy %",
)
render(best_objective_plot)

# Train CNN with best hyperparameters and evaluate on test set



<a id = 'Train-CNN-with-best-hyperparameters-and-evaluate-on-test-set'></a>

In [20]:
#
data = experiment.fetch_data()
df = data.df
best_arm_name = df.arm_name[df["mean"] == df["mean"].max()].values[0]
best_arm = experiment.arms_by_name[best_arm_name]
best_arm

Arm(name='1_0', parameters={'lr': 0.002406429628682412, 'momentum': 0.749737024307251})

In [23]:
#
net = train(
    train_loader=train_loader,
    parameters=best_arm.parameters,
    dtype=dtype,
    device=device,
)
test_accuracy = evaluate(net=net, data_loader=test_loader, dtype=dtype, device=device)

In [24]:
test_accuracy

0.9688

In [None]:
#
print("Classification")