# Hyper-parameter optimization via continuation

In this Notebook we explore several variations of a continuation approach for hyper-parameter optimization of meta-heuristic algorithms using TPE as hyper-parameter optimization methods for configuring PSO hyper-parameters on a benchmarck set of optimization problems.

## 1. Auxiliary functions

In [1]:
import subprocess
import pandas as pd
import numpy as np
import json
import itertools
import os.path
import math
import pickle
import os.path
import logging
import ConfigSpace as CS

from tqdm import tqdm
from functools import partial
from hyperopt import hp
from multiprocessing import Pool, Lock

from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor

import chpo.chpo as chpo

In [2]:
logging.basicConfig(level=logging.WARNING)

### 1.1. Save and load partial results

In [3]:
def dataset_info():
    info = json.load(open("data/prep/list.json"))
    return info

In [4]:
problems = dataset_info()
datasets = {}

def load_dataset(problem):
    return datasets[problem['id']]

def true_load_dataset(problem):
    df = pd.read_csv('data/prep/' + problem['id'], header=0)
    columns = df.columns
    
    df_train = df.sample(frac=0.6)
    df_test = df.drop(df_train.index)
    df_val = df_test.sample(frac=0.5)
    df_test = df_test.drop(df_val.index)

    Xt = df_train[columns[:-1]].values
    Xv = df_val[columns[:-1]].values
    X = df_test[columns[:-1]].values

    yt = df_train[columns[-1]].values.ravel()
    yv = df_val[columns[-1]].values.ravel()
    y = df_test[columns[-1]].values.ravel()

    return Xt, yt, Xv, yv, X, y

for problem in problems:
    datasets[problem['id']] = true_load_dataset(problem)

In [5]:
rdir = './results/nn-bench/'

def save(name, config, data):
    filename = name.join([str(x) for x in config])
    with open(rdir + filename, 'wb') as file:
        pickle.dump(data, file)
        
def load(name, config):
    filename = name.join([str(x) for x in config])
    
    if os.path.isfile(rdir + filename):
        if os.path.getsize(rdir + filename) == 0:
            return False
        with open(rdir + filename, 'rb') as file:
            data = pickle.load(file)
            return data
    else:
        return False

### 1.2. HPO problem definition

### 1.3. Objective and search space

In [6]:
def hpo_objective(param, budget, problem, phase):
    Xt, yt, Xv, yv, X, y = load_dataset(problem)

    mlp = MLPRegressor(hidden_layer_sizes=(int(param['n_hidden_1']), int(param['n_hidden_2'])),
                       solver='sgd',
                       batch_size=int(param['batch_size']),
                       learning_rate_init=param['Lr'], 
                       momentum=param['M'])
    
    for i in range(int(budget)):
        mlp.partial_fit(Xt, yt)
        
    if phase == 'test':
        return mean_squared_error(y, mlp.predict(X))
    else:
        return mean_squared_error(yv, mlp.predict(Xv))

In [7]:
hyperopt_space = {'n_hidden_1': hp.quniform('n_hidden_1', 1, 40, 1),
                  'n_hidden_2': hp.quniform('n_hidden_2', 1, 40, 1),
                  'batch_size': hp.quniform('batch_size', 32, 256, 10),
                  'Lr': hp.uniform('Lr', 0.00001, 0.001),
                  'M': hp.uniform('M', 0.8, 0.9999)}

hb_space = CS.ConfigurationSpace()
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('n_hidden_1', lower=1, upper=40))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('n_hidden_2', lower=1, upper=40))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('batch_size', lower=32, upper=256))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('Lr', lower=0.00001, upper=0.001))
hb_space.add_hyperparameter(CS.UniformFloatHyperparameter('M', lower=0.8, upper=0.9999))

M, Type: UniformFloat, Range: [0.8, 0.9999], Default: 0.89995

## 2. HPO algorithms

### 2.1. TPE

In [8]:
def run_tpe(n_parent, n_base, problem):  
    tpe = chpo.TPE(hyperopt_space, hpo_objective, problem)
    return tpe.run(n_parent, n_base)

### 2.2. CTPE

In [9]:
def run_ctpe(n_parent, n_base, problem, buckets):
    ctpe = chpo.CTPE(hyperopt_space, hpo_objective, problem, buckets)
    return ctpe.run(n_parent, n_base)

### 2.3. HB

In [10]:
def run_hb(n_parent, n_base, problem, eta):
    return chpo.hb(hb_space, hpo_objective, problem, n_parent, n_base, eta)

### 2.4. BOHB

In [11]:
def run_bohb(n_parent, n_base, problem, eta):
    return chpo.bohb(hb_space, hpo_objective, problem, n_parent, n_base, eta)

## 3. Experimentation

In [12]:
trials = list(range(10))
buckets = [2, 3, 4, 5]

n_base = 100
n_parent = 100

### 3.1. TPE

In [13]:
iters = list(itertools.product(problems, trials))

def tpe_run_config(config):
    problem, trial = config
    if not load('tpe', (problem['name'], trial)):
        closs=run_tpe(n_parent, n_base, problem)
        save('tpe', (problem['name'], trial), (closs))
    
if __name__ == '__main__':
    with Pool() as pool:
        for _ in tqdm(pool.imap(tpe_run_config, iters), total=len(iters)):
            pass

100%|██████████| 60/60 [00:00<00:00, 7348.54it/s]


### 3.2. CTPE

In [14]:
iters = list(itertools.product(problems, buckets, trials))

def ctpe_run_config(config):
    problem, bucket, trial = config
    if not load("ctpe", (problem['name'], bucket, trial)):
        closs = run_ctpe(n_parent, n_base, problem, bucket)
        save("ctpe", ((problem['name'], bucket, trial)), (closs))

if __name__ == '__main__':
    with Pool() as pool:
        for _ in tqdm(pool.imap_unordered(ctpe_run_config, iters), total=len(iters)):
            pass

100%|██████████| 240/240 [00:00<00:00, 23382.34it/s]


### 3.3. HB

In [15]:
iters = list(itertools.product(problems, buckets, trials))

def hb_run_config(config):
    problem, eta, trial = config
    if not load("hb", ((problem['name'], eta, trial))):
        n_hb_parent = n_parent / (np.floor(math.log(n_base, eta)) + 1)
        closs = run_hb(n_hb_parent, n_base, problem, eta)
        save("hb", ((problem['name'], eta, trial)), (closs))

for config in tqdm(iters):
    hb_run_config(config)

100%|██████████| 240/240 [3:25:43<00:00, 51.43s/it]  


### 3.4. BOHB

In [16]:
iters = list(itertools.product(problems, buckets, trials))

def bohb_run_config(config):
    problem, eta, trial = config
    if not load("bohb", ((problem['name'], eta, trial))):
        n_hb_parent = n_parent / (np.floor(math.log(n_base, eta)) + 1)
        closs = run_bohb(n_hb_parent, n_base, problem, eta)
        save("bohb", ((problem['name'], eta, trial)), (closs))

for config in tqdm(iters):
    bohb_run_config(config)

100%|██████████| 240/240 [6:58:31<00:00, 104.63s/it]  


## 3. Collecting results

In [17]:
problem_names = [problem['name'] for problem in problems]

def collect_tpe():
    row_idx = itertools.product(problem_names)
    column_idx = itertools.product(trials)
    
    idx = pd.MultiIndex.from_tuples(row_idx, names = ['problem'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['trial'])
    iterspace = list(itertools.product(problem_names, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, trial = iteration
        index = (problem, trial)
        
        df_loss.loc[index] = load('tpe', index)

    return df_loss

def collect_ctpe():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problem_names, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problem_names, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('ctpe', ((problem, bucket, trial)))

    return df_loss

def collect_hb():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problem_names, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problem_names, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('hb', ((problem, bucket, trial)))
        
    return df_loss

def collect_bohb():
    row_idx = itertools.product(trials)
    column_idx = itertools.product(problem_names, buckets)

    idx = pd.MultiIndex.from_tuples(row_idx, names = ['trial'])
    cols = pd.MultiIndex.from_tuples(column_idx, names = ['problem', 'bucket'])
    iterspace = list(itertools.product(problem_names, buckets, trials))
    df_loss = pd.DataFrame(index=idx, columns=cols)
    
    for iteration in iterspace:
        problem, bucket, trial = iteration
        index = ((trial), (problem, bucket))

        df_loss.loc[index] = load('bohb', ((problem, bucket, trial)))
        
    return df_loss

In [18]:
df_tpe_loss = collect_tpe()
df_ctpe_loss = collect_ctpe()
df_hb_loss = collect_hb()
df_bohb_loss = collect_bohb()

In [19]:
df_tpe_loss.to_csv(rdir + 'tpe.loss.csv')
df_ctpe_loss.to_csv(rdir + 'ctpe.loss.csv')
df_hb_loss.to_csv(rdir + 'hb.loss.csv')
df_bohb_loss.to_csv(rdir + 'bohb.loss.csv')