In [1]:
import pickle
import time
import os 
import re 
import random
random.seed(12)
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.insert(1, '../../utils_paper/')

from confUtils import read_conf, change_conf, write_conf
from resultsUtils import read_log

%reload_ext autoreload
%autoreload 2

# Hyperopt search space for conll2003
We present the code for performing a hyper paramater search with the bayesian otimization algorithm of Hyperopt on the CoNNL 2003 shared task corpus.

In [2]:
# embedding models
emb_path = '../../en_word_emb/ggnews_{}.bin'
emb_dims = [25, 100]
emb_paths = [emb_path.format(dim) for dim in emb_dims]
emb_paths

['../../en_word_emb/ggnews_25.bin', '../../en_word_emb/ggnews_100.bin']

In [3]:
# define hyperopt space (dict keys should be the same as yaset parameters names)
space = {'hidden_layer_size' : 32*2**hp.randint('main_hidden_x32',4),
         'char_embedding_size' : 8*hp.choice('char_emb_x8', np.arange(1,6)),
         'char_hidden_layer_size' : 8*hp.choice('char_hidd_x8', np.arange(1,6)),
         'dropout_rate' : hp.uniform('drop', 0.4, 0.6),
         'embedding_dim' : hp.choice('emb_dim', emb_paths)
        }

test = hyperopt.pyll.stochastic.sample(space)
test

{'char_embedding_size': 8,
 'char_hidden_layer_size': 32,
 'dropout_rate': 0.4127366260185579,
 'embedding_dim': '../../en_word_emb/ggnews_25.bin',
 'hidden_layer_size': 64}

In [4]:
path2conf = 'conll.ini'
path2hyper_results = '../results/hyperopt/'
CONF_MODEL = read_conf(path2conf, verbose = False)

In [7]:
def yaset_objective(kwargs):
    #os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    # build new conf
    new_conf = change_conf(CONF_MODEL, kwargs)
    new_conf['working_dir'] = path2hyper_results + 'tmp/'
    write_conf(new_conf, path2hyper_results + 'tmp/', 'tmp')
    #print(kwargs)
    launch_yaset = "yaset LEARN --config "+ path2hyper_results+'tmp/'+'tmp.ini'
    # launch experiment
    os.system(launch_yaset)
    # get the log file 
    yaset_log = None
    f1 = 0
    #detect the log file in the tmp folder 
    for f in os.listdir(path2hyper_results+'tmp/'):
        if os.path.isdir(path2hyper_results+'tmp/'+f):
            for ff in os.listdir(path2hyper_results+'tmp/'+f):
                reg = re.search('.*(\.log)', ff)
                if reg is not None:
                    yaset_log = path2hyper_results+'tmp/'+f+'/'+reg.group(0)
                    # get the score from the log file
                    scores = read_log(yaset_log, verbose = False, warnings = True)
                    if scores is not None:
                        f1 = scores['score'][3]
    for f in os.listdir(path2hyper_results+'tmp/'):
        os.system('mv '+path2hyper_results+'tmp/'+f+' '+path2hyper_results) 
    score = f1
    print(score)
    return{
        'loss' : -score, 
        'status': STATUS_OK,
        'eval_time': time.time()
    }

In [None]:
trials = Trials()
best = fmin(yaset_objective, space=space, algo=tpe.suggest, max_evals=100,trials=trials)
print (best)


90.02
89.06


In [None]:
pickle.dump(trials, open(+"mytrials.p", "wb"))

In [None]:
trials.results