## Launching a run

In [None]:
from hpo_client import HyperparameterTuner
from utils import load_config

config = load_config()

### (Optional) Iterate on the objective

The following piece of code is an example `objective` function. 
You can use the [`%%writefile` magic](https://ipython.readthedocs.io/en/stable/interactive/magics.html#cellmagic-writefile) to write the file.

In [None]:
%%writefile objective_fn_dev.py

import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from utils import get_mnist

DEVICE = torch.device("cpu")

def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    n_classes = 10
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, n_classes))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)


def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    batch_size = 128 # trial.suggest_int("batch_size", 32, 128)
    n_train_examples = 30
    n_valid_examples = 10
    epochs = 10

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist(batch_size=batch_size)

    # Training of the model.
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * batch_size >= n_train_examples:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * batch_size >= n_valid_examples:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), n_valid_examples)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

### Run the job

Now using, the `HyperparameterTuner` class defined in `hpo_client.py`, we can pass this function we are iterating on. 
The purpose of this setup is to make it easier to iterate on the contents of the objective in notebook, save it, and run the flow with minimal moving parts.

In [None]:
tuner = HyperparameterTuner(
    # objective_function_file=config['objective_function_file'],
    objective_function_file='objective_fn_dev.py',
    override_study_name=config.get('study_name', None),
    optuna_app_name=config['optuna_app_name']
)

run_id = tuner.run_blocking(
    override_compute_pool=config['compute_pool'],
    n_trials=config['n_trials'],
    trials_per_task=config['trials_per_task']
)

## Analysis

After flows complete, the results of the hyperparameter tuning process will be captured in the `results` artifact. 
If you want to call `utils.load_study` it requires running in a Metaflow task or running the notebook/script from an Outerbounds workstation.

In [None]:
from metaflow import Run
from utils import extract_flow_name

flow_name = extract_flow_name('flow.py', sanitize=False)
run = Run(f"{flow_name}/{run_id}")
study_df = run.data.results

In [None]:
pruned_df = study_df[study_df.where(study_df['state'] == 'PRUNED').state.notna()]
completed_df = study_df[study_df.where(study_df['state'] == 'COMPLETE').state.notna()]

In [None]:
completed_df.plot(kind='scatter', x='params_lr', y='value', alpha=0.85, logx=True);

## Cleanup

Either remove the dev file after iteration, or replace the previous objective function definition with this one. 
How this interacts with branching in GitHub is a matter of preference.

In [None]:
# Option A
! rm ./objective_fn_dev.py

# Option B
# ! cp ./objective_fn_dev.py ./objective_fn.py