# Hyper-parameter optimization via continuation

In this Notebook we explore several variations of a continuation approach for hyper-parameter optimization of meta-heuristic algorithms using TPE as hyper-parameter optimization methods for configuring PSO hyper-parameters on a benchmarck set of optimization problems.

## 1. Auxiliary functions

In [1]:
import subprocess
import pandas as pd
import numpy as np
import itertools
import os.path
import math
import pickle
import os.path
import logging
import ConfigSpace as CS

from tqdm import tqdm
from functools import partial
from hyperopt import hp
from multiprocessing import Pool, Lock

import chpo.chpo as chpo

In [2]:
logging.basicConfig(level=logging.WARNING)

### 1.1. Save and load partial results

In [3]:
rdir = './results/pso-benchmark/'

def save(name, config, data):
    filename = name.join([str(x) for x in config])
    with open(rdir + filename, 'wb') as file:
        pickle.dump(data, file)
        
def load(name, config):
    filename = name.join([str(x) for x in config])
    
    if os.path.isfile(rdir + filename):
        if os.path.getsize(rdir + filename) == 0:
            return False
        with open(rdir + filename, 'rb') as file:
            data = pickle.load(file)
            return data
    else:
        return False

### 1.2. Interface to `dnn_opt` library

In [4]:
pso_population = 40
pso_problem_dim = 50
pso_algorithm_idx = 0

### 1.3. PSO objective and search space

In [5]:
def run_dnn_opt(max_eval, problem, G, L, Mi, Ma):
    result = subprocess.run(['dnn_opt/bin/examples/benchmark/benchmark', 
                             '-o', '1', '-n', str(pso_problem_dim), '-p', str(pso_population), 
                             '-eta', str(max_eval), '-a', str(pso_algorithm_idx), '-s', str(problem), 
                             '-ha', str(G), '-hb', str(L), '-hc', str(Mi), '-hd', str(Ma)], stdout=subprocess.PIPE)
    return float(result.stdout)

def pso_objective(param, budget, problem):
    return run_dnn_opt(budget, problem, param['G'], param['L'], param['Mi'], param['Ma'])

In [6]:
hyperopt_space = {'G': hp.uniform('G', 0, 3), 'L': hp.uniform('L', 0, 3), 
                  'Mi': hp.uniform('Mi', 0, 3), 'Ma': hp.uniform('Ma', 0, 3)}
hb_space = CS.ConfigurationSpace()
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('G', lower=0, upper=3))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('L', lower=0, upper=3))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('Mi', lower=0, upper=3))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('Ma', lower=0, upper=3))

Ma, Type: UniformFloat, Range: [0.0, 3.0], Default: 1.5

## 2. HPO algorithms

### 2.1. TPE

In [7]:
def run_tpe(n_parent, n_base, problem):  
    tpe = chpo.TPE(hyperopt_space, pso_objective, problem)
    return tpe.run(n_parent, n_base)

### 2.2. CTPE

In [8]:
def run_ctpe(n_parent, n_base, problem, buckets):
    ctpe = chpo.CTPE(hyperopt_space, pso_objective, problem, buckets)
    return ctpe.run(n_parent, n_base)

### 2.3. HB

In [9]:
def run_hb(n_parent, n_base, problem, eta):
    return chpo.hb(hb_space, pso_objective, problem, n_parent, n_base, eta)

### 2.4. BOHB

In [10]:
def run_bohb(n_parent, n_base, problem, eta):
    return chpo.bohb(hb_space, pso_objective, problem, n_parent, n_base, eta)

## 3. Experimentation

In [11]:
problems = list(range(0, 17)) 
trials = list(range(10))
buckets = [2, 3, 4, 5]

n_base = 1000
n_parent = 400

### 3.1. TPE

In [12]:
iters = list(itertools.product(problems, trials))

def tpe_run_config(config):
    problem, trial = config
    if not load('tpe', (problem, trial)):
        closs=run_tpe(n_parent, n_base, problem)
        save('tpe', (problem, trial), (closs))
    
if __name__ == '__main__':
    with Pool() as pool:
        for _ in tqdm(pool.imap(tpe_run_config, iters), total=len(iters)):
            pass

100%|██████████| 170/170 [02:24<00:00,  1.17it/s]


### 3.2. CTPE

In [33]:
iters = list(itertools.product(problems, buckets, trials))

def ctpe_run_config(config):
    problem, bucket, trial = config
    if not load("ctpe", (problem, bucket, trial)):
        closs = run_ctpe(n_parent, n_base, problem, bucket)
        save("ctpe", ((problem, bucket, trial)), (closs))

if __name__ == '__main__':
    with Pool() as pool:
        for _ in tqdm(pool.imap_unordered(ctpe_run_config, iters), total=len(iters)):
            pass

100%|██████████| 680/680 [2:28:36<00:00, 13.11s/it]  


### 3.3. HB

In [12]:
iters = list(itertools.product(problems, buckets, trials))

def hb_run_config(config):
    problem, eta, trial = config
    if not load("hb", ((problem, eta, trial))):
        n_hb_parent = n_parent / (np.floor(math.log(n_base, eta)) + 1)**2
        closs = run_hb(n_hb_parent, n_base, problem, eta)
        save("hb", ((problem, eta, trial)), (closs))

for config in tqdm(iters):
    hb_run_config(config)

100%|██████████| 680/680 [14:09:23<00:00, 74.95s/it]   


### 3.4. BOHB

In [12]:
iters = list(itertools.product(problems, buckets, trials))

def bohb_run_config(config):
    problem, eta, trial = config
    if not load("bohb", ((problem, eta, trial))):
        n_hb_parent = n_parent / (np.floor(math.log(n_base, eta)) + 1)**2
        closs = run_bohb(n_hb_parent, n_base, problem, eta)
        save("bohb", ((problem, eta, trial)), (closs))

for config in tqdm(iters):
    bohb_run_config(config)

100%|██████████| 680/680 [34:04:08<00:00, 180.37s/it]   


## 3. Collecting results

In [35]:
def collect_tpe():
    row_idx = itertools.product(problems)
    column_idx = itertools.product(trials)
    
    idx = pd.MultiIndex.from_tuples(row_idx, names = ['problem'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['trial'])
    iterspace = list(itertools.product(problems, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, trial = iteration
        index = (problem, trial)
        
        df_loss.loc[index] = load('tpe', index)

    return df_loss

def collect_ctpe():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problems, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problems, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('ctpe', ((problem, bucket, trial)))

    return df_loss

def collect_hb():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problems, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problems, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('hb', ((problem, bucket, trial)))
        
    return df_loss

def collect_bohb():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problems, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problems, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('bohb', ((problem, bucket, trial)))
        
    return df_loss

In [36]:
df_tpe_loss = collect_tpe()
df_ctpe_loss = collect_ctpe()
df_hb_loss = collect_hb()
df_bohb_loss = collect_bohb()

In [37]:
df_tpe_loss.to_csv(rdir + 'tpe.loss.csv')
df_ctpe_loss.to_csv(rdir + 'ctpe.loss.csv')
df_hb_loss.to_csv(rdir + 'hb.loss.csv')
df_bohb_loss.to_csv(rdir + 'bohb.loss.csv')