In [6]:
import h5py
import numpy as np
import pandas as pd
import json
import yaml
from ludwig import LudwigModel
import copy
import ray
from ludwig.utils.misc import merge_dict

In [2]:
with open("titanic_full.yaml", 'r') as stream:
    base_model = yaml.load(stream)



  


In [8]:
#base should contain special markup so we know what parameters need to be updated.

def build_trial(base, config):
    combiner = base['combiner']
    training = base['training']
    
    
    combiner = merge_dict(combiner, {'num_fc_layers': config['num_fc_layers']})
    training = merge_dict(training, {'batch_size': config['batch_size']})
    
    new_model_def = {'input_features': base['input_features'], 
                 'output_features': base['output_features'], 
                 'combiner': combiner, 
                 'training': training}
    
    return new_model_def

In [14]:
data = '/Users/benmackenzie/projects/Teradata/ludwig/examples/hyperparameters/titanic.hdf5'
metadata = '/Users/benmackenzie/projects/Teradata/ludwig/examples/hyperparameters/titanic.json'

def train(base, config, reporter):
    
    new_model_def = build_trial(base, config)
    model = LudwigModel(new_model_def)
    train_stats = model.train(data_hdf5=data, train_set_metadata_json=metadata)
    return reporter(mean_accuracy=train_stats['validation']['Survived']['accuracy'][-1], done=True)



    


In [15]:
from ray.tune import register_trainable, grid_search, run_experiments

ray.shutdown()
ray.init()
grid_search_space = {
    'num_fc_layers': grid_search([1,2,3,4]),
    'batch_size': grid_search([4,16,32,64,128])
}

register_trainable('train', lambda cfg, rptr: train(base_model, cfg, rptr))
run_experiments({'my_experiment': {
    'run': 'train',
    'stop': {'mean_accuracy': 0.9},
    'config': grid_search_space}
    })
                

2019-03-14 09:13:03,747	INFO node.py:439 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-14_09-13-03_20306/logs.
2019-03-14 09:13:03,869	INFO services.py:364 -- Waiting for redis server at 127.0.0.1:13451 to respond...
2019-03-14 09:13:04,027	INFO services.py:364 -- Waiting for redis server at 127.0.0.1:60162 to respond...
2019-03-14 09:13:04,032	INFO services.py:761 -- Starting Redis shard with 1.72 GB max memory.
2019-03-14 09:13:04,088	INFO services.py:1449 -- Starting the Plasma object store with 2.58 GB memory using /tmp.
2019-03-14 09:13:04,692	INFO tune.py:135 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run_experiments()
2019-03-14 09:13:04,694	INFO tune.py:145 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Unknown memory usage. Please run `pip install psutil` (or ray[debug]) to resolve)

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 1/4 CPUs, 0/0 GPUs
Unknown memory usage. Please run `pip install psutil` (or ray[debug]) to resolve)
Result logdir: /Users/benmackenzie/ray_results/my_experiment
Number of trials: 20 ({'RUNNING': 1, 'PENDING': 19})
PENDING trials:
 - train_1_batch_size=16,num_fc_layers=1:	PENDING
 - train_2_batch_size=32,num_fc_layers=1:	PENDING
 - train_3_batch_size=64,num_fc_layers=1:	PENDING
 - train_4_batch_size=128,num_fc_layers=1:	PENDING
 - train_5_batch_size=4,num_fc_layers=2:	PENDING
 - train_6_batch_size=16,num_fc_layers=2:	PENDING
 - train_7_batch_size=32,num_fc_layers=2:	PENDING
 - train_8_batch_size=64,num_fc_layers=2:	PENDING
 - train_9_batch_size=128,num_fc_layers=2:	PENDING
 - train_10_batch_size=4,num_fc_layers=3:	PENDING
 - train_11_batch_size=16,num_fc

Result for train_7_batch_size=32,num_fc_layers=2:
  date: 2019-03-14_09-14-03
  done: true
  experiment_id: d33d20245cfa4307aca43f4d4cf3f6f9
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.7901234567901234
  node_ip: 192.168.219.94
  pid: 20718
  time_since_restore: 13.152106046676636
  time_this_iter_s: 13.152106046676636
  time_total_s: 13.152106046676636
  timestamp: 1552569243
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/4 CPUs, 0/0 GPUs
Unknown memory usage. Please run `pip install psutil` (or ray[debug]) to resolve)
Result logdir: /Users/benmackenzie/ray_results/my_experiment
Number of trials: 20 ({'TERMINATED': 6, 'RUNNING': 3, 'PENDING': 11})
PENDING trials:
 - train_9_batch_size=128,num_fc_layers=2:	PENDING
 - train_10_batch_size=4,num_fc_layers=3:	PENDING
 - train_11_batch_size=16,num_fc_layers=3:	PENDING
 - train_12_batch_size=32,num_fc_layers=3:	PENDING
 - train_13_ba

Result for train_11_batch_size=16,num_fc_layers=3:
  date: 2019-03-14_09-14-29
  done: true
  experiment_id: 99271892d709460ca61b5991a6d98b7f
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.7777777777777778
  node_ip: 192.168.219.94
  pid: 20763
  time_since_restore: 10.053380250930786
  time_this_iter_s: 10.053380250930786
  time_total_s: 10.053380250930786
  timestamp: 1552569269
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/4 CPUs, 0/0 GPUs
Unknown memory usage. Please run `pip install psutil` (or ray[debug]) to resolve)
Result logdir: /Users/benmackenzie/ray_results/my_experiment
Number of trials: 20 ({'TERMINATED': 11, 'RUNNING': 3, 'PENDING': 6})
PENDING trials:
 - train_14_batch_size=128,num_fc_layers=3:	PENDING
 - train_15_batch_size=4,num_fc_layers=4:	PENDING
 - train_16_batch_size=16,num_fc_layers=4:	PENDING
 - train_17_batch_size=32,num_fc_layers=4:	PENDING
 - train_18_

Result for train_13_batch_size=64,num_fc_layers=3:
  date: 2019-03-14_09-14-50
  done: true
  experiment_id: 68f5797e46444e41975db1264f45e5cc
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.7901234567901234
  node_ip: 192.168.219.94
  pid: 20777
  time_since_restore: 9.044251203536987
  time_this_iter_s: 9.044251203536987
  time_total_s: 9.044251203536987
  timestamp: 1552569290
  timesteps_since_restore: 0
  training_iteration: 1
  
Result for train_14_batch_size=128,num_fc_layers=3:
  date: 2019-03-14_09-14-53
  done: true
  experiment_id: 050b10e685d04f86852bddbe44551344
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.7901234567901234
  node_ip: 192.168.219.94
  pid: 20783
  time_since_restore: 10.055480003356934
  time_this_iter_s: 10.055480003356934
  time_total_s: 10.055480003356934
  timestamp: 1552569293
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requeste

Result for train_17_batch_size=32,num_fc_layers=4:
  date: 2019-03-14_09-15-14
  done: true
  experiment_id: 2d25a2aa1fa94fb4b772b70fb7924063
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.7777777777777778
  node_ip: 192.168.219.94
  pid: 20797
  time_since_restore: 10.037584781646729
  time_this_iter_s: 10.037584781646729
  time_total_s: 10.037584781646729
  timestamp: 1552569314
  timesteps_since_restore: 0
  training_iteration: 1
  
Result for train_18_batch_size=64,num_fc_layers=4:
  date: 2019-03-14_09-15-18
  done: true
  experiment_id: 665a6eb26c3b4b0ab3cceb4806473119
  hostname: 192.168.219.94
  iterations_since_restore: 1
  mean_accuracy: 0.8024691358024691
  node_ip: 192.168.219.94
  pid: 20802
  time_since_restore: 11.04942011833191
  time_this_iter_s: 11.04942011833191
  time_total_s: 11.04942011833191
  timestamp: 1552569318
  timesteps_since_restore: 0
  training_iteration: 1
  
Result for train_19_batch_size=128,num_fc_layers=4:
  date: 2019-

[train_0_batch_size=4,num_fc_layers=1,
 train_1_batch_size=16,num_fc_layers=1,
 train_2_batch_size=32,num_fc_layers=1,
 train_3_batch_size=64,num_fc_layers=1,
 train_4_batch_size=128,num_fc_layers=1,
 train_5_batch_size=4,num_fc_layers=2,
 train_6_batch_size=16,num_fc_layers=2,
 train_7_batch_size=32,num_fc_layers=2,
 train_8_batch_size=64,num_fc_layers=2,
 train_9_batch_size=128,num_fc_layers=2,
 train_10_batch_size=4,num_fc_layers=3,
 train_11_batch_size=16,num_fc_layers=3,
 train_12_batch_size=32,num_fc_layers=3,
 train_13_batch_size=64,num_fc_layers=3,
 train_14_batch_size=128,num_fc_layers=3,
 train_15_batch_size=4,num_fc_layers=4,
 train_16_batch_size=16,num_fc_layers=4,
 train_17_batch_size=32,num_fc_layers=4,
 train_18_batch_size=64,num_fc_layers=4,
 train_19_batch_size=128,num_fc_layers=4]



a more eleborate approach to specifying parameters to search over..

In [57]:
#version 1, doesn't handle lists.  extracts parameters that we want to search over.  


import re
pattern = "^{{.*}}"

def get_keys(dct, path=""):
    parameters = []
    for k, v in dct.items():
        if isinstance(dct[k], dict):
            p = get_keys(dct[k], k+"->" )
            if p:
                for l in p:
                    parameters.append(l)
            
        elif isinstance(dct[k], str):
            if re.match(pattern, dct[k], flags=0) is not None:
                parameters.append(path + k +"->" + dct[k])
       
    return parameters

#updates parameters...doens't handle lists

def update_param(dct, path, value):
    if len(path) == 1:
        dct[path[0]] = value
    else:
        update_param(dct[path[0]], path[1:], value)
        
def build_model(base_model, config):
    for k, v in config.items():
        p = k.split('->')
        update_param(base_model, p, v)



In [58]:
training = {'optimizer': {'type': 'adam', 'beta1': 0.9, 'beta2': '{{0.999}}', 'epsilon': 1e-08},
 'epochs': '{{100}}',
 'regularizer': 'l2',
 'regularization_lambda': 0,
 'learning_rate': 0.001,
 'batch_size': 128,
 'dropout_rate': 0.0,
 'early_stop': 5,
 'reduce_learning_rate_on_plateau': 0,
 'reduce_learning_rate_on_plateau_patience': 5,
 'reduce_learning_rate_on_plateau_rate': 0.5,
 'increase_batch_size_on_plateau': 0,
 'increase_batch_size_on_plateau_patience': 5,
 'increase_batch_size_on_plateau_rate': 2,
 'increase_batch_size_on_plateau_max': 512,
 'decay': False,
 'decay_steps': 10000,
 'decay_rate': '{{0.96}}',
 'staircase': False,
 'gradient_clipping': None,
 'validation_field': 'combined',
 'validation_measure': 'loss',
 'bucketing_field': None,
 'learning_rate_warmup_epochs': 5}


k = get_keys(training)
k

['optimizer->beta2->{{0.999}}', 'epochs->{{100}}', 'decay_rate->{{0.96}}']

In [74]:
config = {'optimizer->beta1': 0.77, 'epochs': 999}
build_model(training, config)
training

{'optimizer': {'type': 'adam',
  'beta1': 0.77,
  'beta2': '{{0.999}}',
  'epsilon': 1e-08},
 'epochs': 999,
 'regularizer': 'l2',
 'regularization_lambda': 0,
 'learning_rate': 0.001,
 'batch_size': 128,
 'dropout_rate': 0.0,
 'early_stop': 5,
 'reduce_learning_rate_on_plateau': 0,
 'reduce_learning_rate_on_plateau_patience': 5,
 'reduce_learning_rate_on_plateau_rate': 0.5,
 'increase_batch_size_on_plateau': 0,
 'increase_batch_size_on_plateau_patience': 5,
 'increase_batch_size_on_plateau_rate': 2,
 'increase_batch_size_on_plateau_max': 512,
 'decay': False,
 'decay_steps': 10000,
 'decay_rate': '{{0.96}}',
 'staircase': False,
 'gradient_clipping': None,
 'validation_field': 'combined',
 'validation_measure': 'loss',
 'bucketing_field': None,
 'learning_rate_warmup_epochs': 5}