# Hyperparameter Optimization with Hyperopt



## Install Hyperopt package

In [1]:
!pip install hyperopt

Collecting hyperopt
[?25l  Downloading https://files.pythonhosted.org/packages/0b/4a/79541d4f61e7878f846f68ab31ed709bac6ee99345378c0e02658c3be0d4/hyperopt-0.2.2-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 1.7MB/s eta 0:00:01
Collecting future
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 2.9MB/s eta 0:00:01
[?25hCollecting networkx==2.2
[?25l  Downloading https://files.pythonhosted.org/packages/f3/f4/7e20ef40b118478191cec0b58c3192f822cace858c19505c7670961b76b2/networkx-2.2.zip (1.7MB)
[K     |████████████████████████████████| 1.7MB 7.1MB/s eta 0:00:01
Building wheels for collected packages: future, networkx
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Created wheel for future: filename=future-0.18.2-cp36-none-any.whl size=493275 sha256=d2427765366dc823355eccffe20d9a3f00f1c05b25a9

## Import required libraries

In [2]:
import warnings
warnings.simplefilter('ignore')
from ludwig.api import LudwigModel
from ludwig.utils.data_utils import load_json
from ludwig.visualize import learning_curves
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll.stochastic import rng_from_seed
import logging
import shutil

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
# clean out old results
try:
    shutil.rmtree('./results')
except:
    pass

try:
    shutil.rmtree('./visualizations')
except:
    pass

try:
    file_list = glob.glob('./data/*.json')
    file_list.append(glob.glob('./data/*.hdf5'))
    os.remove(file_list)
except:
    pass


In [4]:
raw_df = pd.read_csv('./data/train.csv')
raw_df.shape

(891, 12)

In [5]:
train_df, vald_df = train_test_split(raw_df, test_size = 0.2, random_state=13)

## Define objective function for minimization

In [6]:
def score_model(params):
    
    print(params)
    
    model_definition['training']['learning_rate'] = params['learning_rate']
    model_definition['training']['batch_size'] = params['batch_size']
    model_definition['output_features'][0]['num_fc_layers'] = np.int(params['output_fc_num_layers'])
    model_definition['output_features'][0]['fc_size'] = np.int(params['output_fc_size'])
    
    # Define Ludwig model object that drive model training
    model = LudwigModel(model_definition,
                        logging_level=logging.WARN)

    # initiate model training
    train_stats = model.train(data_train_df = train_df,
                            data_validation_df = vald_df,
                            experiment_name='experiment_name',
                            model_name='model_name',
                            skip_save_training_description=True, 
                            skip_save_training_statistics=True, 
                            skip_save_model=True, 
                            skip_save_progress=True,                          
                            skip_save_log=True,
                            skip_save_processed_input=True,
                            random_seed=42)



    model.close()
    
    validation_losses = train_stats['validation']['Survived']['loss']
    
    last_epoch = len(validation_losses)
    
    return {'loss': validation_losses[last_epoch - 1], 'status': STATUS_OK}

## Define base model

In [7]:

model_definition = {
    'input_features': [
        {'name': 'Pclass', 'type': 'category'},
        {'name': 'Sex', 'type': 'category'},
        {'name': 'Age', 'type': 'numerical', 
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'zscore'}},
        {'name': 'SibSp', 'type': 'numerical', 
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'minmax'}},
        {'name': 'Parch', 'type': 'numerical',
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'minmax'}},
        {'name': 'Fare', 'type': 'numerical',
            'preprocessing': {'missing_value_strategy': 'fill_with_mean', 'normalization': 'zscore'}},
        {'name': 'Embarked', 'type': 'category'}
    ],
    
    'output_features':[
        {'name': 'Survived', 'type': 'binary'}
    ],
    
    'training': {}
}

## Define hyperparameter search space

In [8]:
SEED=13

space = {'learning_rate':  hp.uniform('learning_rate', 0.0001, 0.01, rng=rng_from_seed(SEED)),
         'batch_size': 2 ** (3 + hp.randint('batch_size_exponent',5, rng=rng_from_seed(SEED))),
         'output_fc_num_layers': hp.quniform('output_fc_num_layers',1,4,1, rng=rng_from_seed(SEED)),
         'output_fc_size': hp.quniform('output_fc_size',5,100,1, rng=rng_from_seed(SEED))
        }

In [9]:
np.random.seed(123)
best = fmin(fn=score_model,
    space=space,
    algo=tpe.suggest,
    max_evals=30)

{'batch_size': 64, 'learning_rate': 0.002476612622869838, 'output_fc_num_layers': 2.0, 'output_fc_size': 8.0}
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
targets is deprecated, use labels instead

{'batch_size': 32, 'learning_rate': 0.004832685688730726, 'output_fc_num_layers': 2.0, 'output_fc_size': 91.0}
{'batch_size': 8, 'learning_rate': 0.002846487689031914, 'output_fc_num_layers': 4.0, 'output_fc_size': 19.0}
{'batch_size': 16, 'learning_rate': 0.002430436589437408, 'output_fc_num_layers': 3.0, 'output_fc_size': 55.0}
{'batch_size': 64, 'learning_rate': 0.005678823076109719, 'output_fc_num_layers': 2.0, 'output_fc_size': 66.0}
{'batch_size': 8, 'learning_rate': 0.00608247530874953, 'output_fc_num_layers': 4.0, 'output_fc_size': 10.0}
{'batch_size': 8, 'learning_rate': 0.00937153269542596, 'output_fc_num_layers': 3.0, 'output_fc_size': 32.0}
{'batch_size': 16, 'learning_rate': 0.005

## Optimal hyperparameters

In [10]:
print('batch size:', 2 ** np.int(3 + best['batch_size_exponent']),
      ', learning_rate:', best['learning_rate'],
      ', fc_num_layers:', np.int(best['output_fc_num_layers']),
      ', fc_size:', np.int(best['output_fc_size']))

batch size: 16 , learning_rate: 0.00824635803549801 , fc_num_layers: 1 , fc_size: 5


## Train with optimal hyperparameters on the whole data set

In [11]:
# clean out old results
try:
    shutil.rmtree('./results')
except:
    pass

try:
    shutil.rmtree('./visualizations')
except:
    pass



In [None]:
# set optimal hyperparameters for training    
model_definition['training']['learning_rate'] = best['learning_rate']
model_definition['training']['batch_size'] = 2** (3 + best['batch_size_exponent'])
model_definition['output_features'][0]['num_fc_layers'] = np.int(best['output_fc_num_layers'])
model_definition['output_features'][0]['fc_size'] = np.int(best['output_fc_size'])


# Define Ludwig model object that drive model training
model = LudwigModel(model_definition,
                    logging_level=logging.WARN)

# initiate model training
train_stats = model.train(data_train_df = raw_df,
                        experiment_name='hyperparameter_training',
                        model_name='optimized_model',                        
                        random_seed=42)



model.close()
