# Hyperparameter Optimization with Hyperopt



## Install Hyperopt package

In [1]:
!pip install hyperopt



## Import required libraries

In [2]:
import warnings
warnings.simplefilter('ignore')
from ludwig.api import LudwigModel
from ludwig.utils.data_utils import load_json
from ludwig.visualize import learning_curves
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll.stochastic import rng_from_seed
import logging
import shutil

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
# clean out old results
try:
    shutil.rmtree('./results')
except:
    pass

try:
    shutil.rmtree('./visualizations')
except:
    pass

try:
    file_list = glob.glob('./data/*.json')
    file_list.append(glob.glob('./data/*.hdf5'))
    os.remove(file_list)
except:
    pass


In [4]:
raw_df = pd.read_csv('./data/train.csv')
raw_df.shape

(891, 12)

In [5]:
train_df, vald_df = train_test_split(raw_df, test_size = 0.2, random_state=13)

## Define objective function for minimization

In [6]:
def score_model(params):
    
    print(params)
    
    model_definition['training']['learning_rate'] = params['learning_rate']
    model_definition['training']['batch_size'] = params['batch_size']
    model_definition['output_features'][0]['num_fc_layers'] = np.int(params['output_fc_num_layers'])
    model_definition['output_features'][0]['fc_size'] = np.int(params['output_fc_size'])
    
    # Define Ludwig model object that drive model training
    model = LudwigModel(model_definition,
                        logging_level=logging.WARN)

    # initiate model training
    train_stats = model.train(data_train_df = train_df,
                            data_validation_df = vald_df,
                            experiment_name='experiment_name',
                            model_name='model_name',
                            skip_save_training_description=True, 
                            skip_save_training_statistics=True, 
                            skip_save_model=True, 
                            skip_save_progress=True,                          
                            skip_save_log=True,
                            skip_save_processed_input=True,
                            random_seed=42)



    model.close()
    
    validation_losses = train_stats['validation']['Survived']['loss']
    
    last_epoch = len(validation_losses)
    
    return {'loss': validation_losses[last_epoch - 1], 'status': STATUS_OK}

## Define base model

In [7]:

model_definition = {
    'input_features': [
        {'name': 'Pclass', 'type': 'category'},
        {'name': 'Sex', 'type': 'category'},
        {'name': 'Age', 'type': 'numerical', 
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'zscore'}},
        {'name': 'SibSp', 'type': 'numerical', 
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'minmax'}},
        {'name': 'Parch', 'type': 'numerical',
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'minmax'}},
        {'name': 'Fare', 'type': 'numerical',
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'zscore'}},
        {'name': 'Embarked', 'type': 'category'}
    ],
    
    'output_features':[
        {'name': 'Survived', 'type': 'binary'}
    ],
    
    'training': {}
}

## Define hyperparameter search space

In [8]:
SEED=13

space = {'learning_rate':  hp.uniform('learning_rate', 0.0001, 0.01, rng=rng_from_seed(SEED)),
         'batch_size': 2 ** (3 + hp.randint('batch_size_exponent',5, rng=rng_from_seed(SEED))),
         'output_fc_num_layers': hp.quniform('output_fc_num_layers',1,4,1, rng=rng_from_seed(SEED)),
         'output_fc_size': hp.quniform('output_fc_size',5,100,1, rng=rng_from_seed(SEED))
        }

In [9]:
np.random.seed(123)
best = fmin(fn=score_model,
    space=space,
    algo=tpe.suggest,
    max_evals=30)

{'batch_size': 64, 'learning_rate': 0.0014278776196269983, 'output_fc_num_layers': 1.0, 'output_fc_size': 38.0}
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
targets is deprecated, use labels instead

{'batch_size': 128, 'learning_rate': 0.004827655322986053, 'output_fc_num_layers': 2.0, 'output_fc_size': 5.0}
{'batch_size': 16, 'learning_rate': 0.006582161965921761, 'output_fc_num_layers': 4.0, 'output_fc_size': 34.0}
{'batch_size': 32, 'learning_rate': 0.00028175320126028273, 'output_fc_num_layers': 3.0, 'output_fc_size': 33.0}
{'batch_size': 16, 'learning_rate': 0.0018049283628271554, 'output_fc_num_layers': 2.0, 'output_fc_size': 9.0}
{'batch_size': 8, 'learning_rate': 0.0018401902444361475, 'output_fc_num_layers': 2.0, 'output_fc_size': 21.0}
{'batch_size': 16, 'learning_rate': 0.008365035429481069, 'output_fc_num_layers': 2.0, 'output_fc_size': 12.0}
{'batch_size': 64, 'learning_rat

## Optimal hyperparameters

In [10]:
print('batch size:', 2 ** np.int(3 + best['batch_size_exponent']),
      ', learning_rate:', best['learning_rate'],
      ', fc_num_layers:', np.int(best['output_fc_num_layers']),
      ', fc_size:', np.int(best['output_fc_size']))

batch size: 64 , learning_rate: 0.0014278776196269983 , fc_num_layers: 1 , fc_size: 38


## Train with optimal hyperparameters on the whole data set

In [11]:
# clean out old results
try:
    shutil.rmtree('./results')
except:
    pass

try:
    shutil.rmtree('./visualizations')
except:
    pass



In [12]:
# set optimal hyperparameters for training    
model_definition['training']['learning_rate'] = best['learning_rate']
model_definition['training']['batch_size'] = 2** (3 + best['batch_size_exponent'])
model_definition['output_features'][0]['num_fc_layers'] = np.int(best['output_fc_num_layers'])
model_definition['output_features'][0]['fc_size'] = np.int(best['output_fc_size'])


# Define Ludwig model object that drive model training
model = LudwigModel(model_definition,
                    logging_level=logging.WARN)

# initiate model training
train_stats = model.train(data_train_df = raw_df,
                        experiment_name='experiment_name',
                        model_name='model_name',                        
                        random_seed=42)



model.close()
