# Tune neural networks 
This example is to tune neural networks model. 

**Requirements.** This notebook requires:

In [None]:
# %pip install torch torchvision flaml[blendsearch,ray] thop

## Data

In [1]:
import torch
import thop
import torch.nn as nn
from flaml import tune
import torch.nn.functional as F
import torchvision
import numpy as np
import os

DEVICE = torch.device("cpu")
BATCHSIZE = 128
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10
data_dir = os.path.abspath("data")

train_dataset = torchvision.datasets.FashionMNIST(
    data_dir,
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor(),
)

train_loader = torch.utils.data.DataLoader(
    torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
    batch_size=BATCHSIZE,
    shuffle=True,
)

val_dataset = torchvision.datasets.FashionMNIST(
    data_dir, train=False, transform=torchvision.transforms.ToTensor()
)

val_loader = torch.utils.data.DataLoader(
    torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
    batch_size=BATCHSIZE,
    shuffle=True,
)

## Specify the model

In [2]:
def define_model(configuration):
    n_layers = configuration["n_layers"]
    layers = []
    in_features = 28 * 28
    for i in range(n_layers):
        out_features = configuration["n_units_l{}".format(i)]
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = configuration["dropout_{}".format(i)]
        layers.append(nn.Dropout(p))
        in_features = out_features
    layers.append(nn.Linear(in_features, 10))
    layers.append(nn.LogSoftmax(dim=1))
    return nn.Sequential(*layers)

## Train

In [3]:
def train_model(model, optimizer, train_loader):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        F.nll_loss(model(data), target).backward()
        optimizer.step()

## Metrics 

In [4]:
def eval_model(model, valid_loader):
    model.eval()
    correct = 0
    import time
    start = time.time()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(valid_loader):
            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
            pred = model(data).argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    avg_batch_pred_time = (time.time() - start) / len(valid_loader)

    accuracy = correct / N_VALID_EXAMPLES
    flops, params = thop.profile(
        model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
    )
    return np.log2(flops), 1 - accuracy, params, avg_batch_pred_time

## Evaluate function

In [5]:
def evaluate_function(configuration):
    model = define_model(configuration).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
    n_epoch = configuration["n_epoch"]
    for epoch in range(n_epoch):
        train_model(model, optimizer, train_loader)
    flops, error_rate, params, avg_batch_pred_time = eval_model(model, val_loader)
    return {"error_rate": error_rate, "flops": flops, "params": params, "avg_batch_pred_time": avg_batch_pred_time}

## Search space

In [7]:
search_space = {
    "n_layers": tune.randint(lower=1, upper=3),
    "n_units_l0": tune.randint(lower=4, upper=128),
    "n_units_l1": tune.randint(lower=4, upper=128),
    "n_units_l2": tune.randint(lower=4, upper=128),
    "dropout_0": tune.uniform(lower=0.2, upper=0.5),
    "dropout_1": tune.uniform(lower=0.2, upper=0.5),
    "dropout_2": tune.uniform(lower=0.2, upper=0.5),
    "lr": tune.loguniform(lower=1e-5, upper=1e-1),
    "n_epoch": tune.randint(lower=1, upper=20),
}

## Launch the tuning

In [8]:
low_cost_partial_config = {
    "n_layers": 1,
    "n_units_l0": 4,
    "n_units_l1": 4,
    "n_units_l2": 4,
    "n_epoch": 1,
}

pred_time_constraints = [('avg_batch_pred_time', "<=", 1)]
analysis = tune.run(
    evaluate_function,
    metric="error_rate",
    mode="min",
    time_budget_s=100,
    config=search_space,
    low_cost_partial_config=low_cost_partial_config, # provide low cost initial config for cost related hyperparameters
    metric_constraints = pred_time_constraints, # add constratints on one or more of the metrics: here adding a constraint on the avg predict time per batch
    use_ray=True,  # using ray as the backend to do parallel tuning
    resources_per_trial={"cpu": 2},
)
result = analysis.best_result
print(result)

You passed a `space` parameter to OptunaSearch that contained unresolved search space definitions. OptunaSearch should however be instantiated with fully configured search spaces only. To use Ray Tune's automatic search space conversion, pass the space definition as part of the `config` argument to `tune.run()` instead.
[32m[I 2023-01-08 10:37:37,596][0m A new study created in memory with name: optuna[0m


Trial evaluate_function_89f95f20 reported error_rate=0.79 with parameters={'n_layers': 1, 'n_units_l0': 4, 'n_units_l1': 4, 'n_units_l2': 4, 'n_epoch': 1, 'dropout_0': 0.22922962186368476, 'dropout_1': 0.3941798243996576, 'dropout_2': 0.3088427685434228, 'lr': 0.00029828938661167917}.


Trial evaluate_function_89f95f20 completed. Last result: error_rate=0.79375,flops=11.632995197142957,params=3190.0,avg_batch_pred_time=0.013302230834960937


Trial name,status,loc,dropout_0,dropout_1,dropout_2,lr,n_epoch,n_layers,n_units_l0,n_units_l1,n_units_l2,iter,total time (s),error_rate,flops,params
evaluate_function_89f95f20,TERMINATED,130.203.136.143:1366904,0.22923,0.39418,0.308843,0.000298289,1,1,4,4,4,1,0.607912,0.79375,11.633,3190


2023-01-08 10:37:51,683	INFO tune.py:747 -- Total run time: 9.87 seconds (7.94 seconds for the tuning loop).


{'error_rate': 0.79375, 'flops': 11.632995197142957, 'params': 3190.0, 'avg_batch_pred_time': 0.013302230834960937, 'time_this_iter_s': 0.6079120635986328, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 1, 'trial_id': '89f95f20', 'experiment_id': 'ec552f15cd404633a7a916e39c2847f0', 'date': '2023-01-08_10-37-51', 'timestamp': 1673203071, 'time_total_s': 0.6079120635986328, 'pid': 1366904, 'hostname': 'i4-l-qxw5138-01.ad.psu.edu', 'node_ip': '130.203.136.143', 'config': {'n_layers': 1, 'n_units_l0': 4, 'n_units_l1': 4, 'n_units_l2': 4, 'n_epoch': 1, 'dropout_0': 0.22922962186368476, 'dropout_1': 0.3941798243996576, 'dropout_2': 0.3088427685434228, 'lr': 0.00029828938661167917}, 'time_since_restore': 0.6079120635986328, 'timesteps_since_restore': 0, 'iterations_since_restore': 1, 'warmup_time': 0.00460052490234375, 'experiment_tag': '1_dropout_0=0.2292,dropout_1=0.3942,dropout_2=0.3088,lr=0.0003,n_epoch=1,n_layers=1,n_units_l0=4,n_units_l1=4,n_units_l