In [1]:
%load_ext autoreload
%autoreload 2

import learn_ann
import keras
import keras.backend as K
import numpy as np
import pandas as pd
import json
import yaml
import os
import os.path
from boolnet.utils import NumpyAwareJSONEncoder

Using TensorFlow backend.


In [2]:
# 'Nh': 60,
# 'optimizer': 'Nadam',
# 'nonlinearity': 'tanh',
# 'loss': 'mse', 

Plan:
 - random search as per: http://www.jmlr.org/papers/v13/bergstra12a.html
 - choose single Ne near middle of transition band for each function
 - Nh in range(30, 120)
 - opt in [RMSprop, Adadelta, Nadam]
 - nonlin = tanh
 - loss in [mse, hinge, squared_hinge, binary_crossentropy, logcosh]
 - how to deal with epochs?   1000 = 6-8 sec,   10000 = 60-80 sec
 - no batch normalisation (we want binary extremes)

Currently doing full-batch, but LeCun recommends Stochastic (or mini-batch)
http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf

EarlyStopping

`keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto')`

need `validation_split`/`validation_data` in `fit()`

# Optimiser selection:

### SGD
probably worth checking
 - lr: float >= 0. Learning rate.
 - momentum: float >= 0. Parameter updates momentum.
 - decay: float >= 0. Learning rate decay over each update.
 - nesterov: boolean. Whether to apply Nesterov momentum.


### RMSprop
recommended leave parameters default (except learning rate, which can be freely tuned).
This optimizer is usually a good choice for recurrent neural networks.
 - lr: float >= 0. Learning rate.
 - rho: float >= 0.
 - epsilon: float >= 0. Fuzz factor.
 - decay: float >= 0. Learning rate decay over each update.


### Adadelta
It is recommended to leave the parameters of this optimizer at their default values.
 - lr: float >= 0. Learning rate. It is recommended to leave it at the default value.
 - rho: float >= 0.
 - epsilon: float >= 0. Fuzz factor.
 - decay: float >= 0. Learning rate decay over each update.


### Nadam
Default parameters follow those provided in the paper.
It is recommended to leave the parameters of this optimizer at their default values.
 - lr: float >= 0. Learning rate.
 - beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
 - epsilon: float >= 0. Fuzz factor.


### Adagrad
use Adadelta or RMSprop instead
### Adam
Use Nadam instead
### Adamax
Use Nadam instead

In [2]:
def nested_set(dic, keys, value):
    for key in keys[:-1]:
        dic = dic.setdefault(key, {})
    dic[keys[-1]] = value

def search(base_params, generators, n, stream):
    for i in range(n):
        record = {}
        for keys, gen in generators.items():
            val = gen()
            nested_set(base_params, keys, val)
            record[' '.join(keys)] = val
        result = learn_ann.learn(base_params)
        record['time'] = result['learning_time']
        record['trg_err'] = result['trg_err']
        record['trg_errs'] = result['trg_errs']
        record['test_err'] = result['test_err']
        record['test_errs'] = result['test_errs']
        record['trg_mcc'] = result['trg_mcc']
        record['trg_mccs'] = result['trg_mccs']
        record['test_mcc'] = result['test_mcc']
        record['test_mccs'] = result['test_mccs']
        stream.write(',' if i else '[')
        json.dump(record, stream, cls=NumpyAwareJSONEncoder)
        stream.write('\n')
        K.clear_session()
    stream.write(']\n')

In [8]:
# searchspace = {
#     ('Nh', ): range(30, 121),
#     ('optimizer', ): ['SGD', 'RMSprop', 'Adadelta', 'Nadam'],
#     ('loss', ): ['mse', 'hinge', 'squared_hinge', 'binary_crossentropy', 'logcosh'],
#     ('fit', 'batch_size'): [8, 16, 32, 64],
#     ('fit', 'epochs'): [1000, 2000, 4000, 8000],
# }
searchspace = {
    ('optimizer', ): ['SGD', 'RMSprop', 'Adadelta', 'Nadam'],
#     ('sampling', 'Ne'): [8, 16, 32, 64, 128],
    ('architecture', 'name'): ['shared_a', 'shared_b', 'shared_c', 'parallel', 'layered'],
}

In [9]:
n = 50

In [10]:
problem = 'add5'
run_description = f'{problem}_{n}'

base_params = {
    'data': {
        'filename': f'/home/shannon/HMRI/experiments/datasets/functions/{problem}.npz'
    },
    'sampling': {
        'Ne': 64,
        'seed': None
    },
    'architecture': {
#         'name': 'shared_a',
        'params': {}
    },
    'fit': {
         'epochs': 5000,
#          'batch_size': Ne
    },
    'batch_ratio': 0.5,
    'nonlinearity': 'tanh',
    'optimizer': 'RMSprop',
    'loss': 'squared_hinge',
    'Nh': 40,
}


name = next(f'sweeps/{run_description}_{i}'
            for i in range(10000)
            if not os.path.exists(f'sweeps/{run_description}_{i}'))

os.makedirs(name)
print(name)

with open(f'{name}/base.yaml', 'w') as f:
    yaml.dump(base_params, stream=f)
    
with open(f'{name}/searchspace.yaml', 'w') as f:
    yaml.dump(searchspace, stream=f)

generators = {key: lambda o=options: np.random.choice(o)
              for key, options in searchspace.items()}
    
import tensorflow as tf
with tf.device('/cpu:0'):
    with open(f'{name}/results.json', 'w') as f:
        search(base_params, generators, n, f)

sweeps/add5_50_0
