# Distributed hyper-parameter optimization with the Cray HPO library

Some documentation here:
https://pubs.cray.com/content/S-2589/1.2.UP00/xctm-series-urika-xc-analytic-applications-guide/hyperparameter-optimization-hpo-support

In [1]:
import sys
sys.path.append('/global/cscratch1/sd/bja')

In [2]:
# Externals
from crayai import hpo
import pandas as pd

In [3]:
# Define hyperparameters
params = hpo.Params([['--h1', 4, [4, 8, 16]],
                     ['--h2', 8, [8, 16, 32]],
                     ['--h3', 16, [16, 32, 64]],
                     ['--dropout', 0.2, (0., 1.)],
                     ['--optimizer', 'Adam', ['Adam', 'Nadam']]])

In [4]:
# Training config
n_epochs = 4

# Job config
n_nodes = 32
alloc_args = '-q interactive -C haswell -t 2:00:00'

# Optimizer config
pop_size = 4
num_demes = 4
generations = 16
log_file = 'results.log'

In [None]:
# Define the evaluator
cmd = 'python train.py --epochs %i' % (n_epochs)
evaluator = hpo.Evaluator(cmd, nodes=n_nodes, alloc_args=alloc_args)

In [None]:
# Define the optimizer
optimizer = hpo.genetic.Optimizer(evaluator,
                                  pop_size=pop_size,
                                  num_demes=num_demes,
                                  generations=generations,
                                  log_fn=log_file)

In [None]:
# Run the optimizer over the hyperparameters
optimizer.optimize(params)

In [7]:
pd.read_csv(log_file, delim_whitespace=True)

Unnamed: 0,#generation,epoch,best_fom,avg_fom,checkpoint_in,checkpoint_out,--h1,--h2,--h3,--dropout,--optimizer
0,0,1,0.073547,0.115834,,,16,16,64,0.118847,Nadam
1,1,2,0.058352,0.086159,,,4,32,64,0.118879,Nadam
2,2,3,0.058352,0.086819,,,4,32,64,0.118879,Nadam
3,3,4,0.056483,0.082912,,,16,32,64,0.227348,Nadam
4,4,5,0.056483,0.090415,,,16,32,64,0.227348,Nadam
5,5,6,0.052548,0.07836,,,16,32,64,0.091499,Nadam
6,6,7,0.052548,0.075095,,,16,32,64,0.091499,Nadam
7,7,8,0.047567,0.075947,,,16,32,64,0.082243,Nadam
8,8,9,0.047567,0.091844,,,16,32,64,0.082243,Nadam
