In [1]:
%load_ext autoreload
%autoreload 2 
%matplotlib inline
# This blokc is important if we want the memory to grow on the GPU, and not block allocate the whole thing
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
from keras.utils import plot_model
from keras.callbacks import TensorBoard
from datetime import datetime
import os
from keras.callbacks import EarlyStopping, ModelCheckpoint
from livelossplot.keras import PlotLossesCallback
from pathlib import Path
from sklearn.utils import class_weight 
# Hyperparam opt
import talos as ta

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))

# Set path to find modelling tools for later use
import sys
sys.path.append(os.path.join(os.getcwd(),".."))
# Global params live here
import haberrspd.charCNN.globals

Using TensorFlow backend.


In [None]:
# Load data-loader
from haberrspd.charCNN.auxiliary_tf import create_training_data, 
# Load model
from haberrspd.charCNN.models_tf import char_cnn_model, char_cnn_model_talos

## Load training data and validation data as well as auxiliary model parameters

In [None]:
DATA_ROOT = Path("../data/") / "MJFF" / "preproc" # Note the relative path
# Load training data and auxiliary variables
X_train, X_test, y_train, y_test, max_sentence_length = \
create_training_data(DATA_ROOT,"EnglishData-preprocessed.csv",'sentence')
class_weights = class_weight.compute_class_weight('balanced',list(set(y_train)),y_train)

In [None]:
# Rev-up tensorboard
# logdir="../logs/char-cnn-keras-" + datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = TensorBoard(log_dir=logdir)
# Set model
model = char_cnn_model(max_sentence_length)
model.summary()

## Assign a loss function

In [None]:
loss_func = 'squared_hinge'

if loss_func == 'hinge' or loss_func == 'squared_hinge':
    y_train = [-1 if x==0 else x for x in y_train]
    y_test = [-1 if x==0 else x for x in y_test]
    
if loss_func == 'binary_crossentropy':
    # Check if label-space is correct
    if (-1 in y_train) or (-1 in y_test):
        y_train = [0 if x==-1 else x for x in y_train]
        y_test = [0 if x==-1 else x for x in y_test]

## Train model

In [None]:
# Compile
model.compile(loss=loss_func,  # TODO: change to cosine loss, cosine_proximity, binary_crossentropy
              optimizer='adam',            # TODO: check which is most appropriate
              metrics=['accuracy'])        # Probs other options here which are more useful

# Check if checkpoints dir exists, if not make it
if not os.path.exists('../../keras_checkpoints'):
    os.makedirs('../../keras_checkpoints')

# Callbacks
file_name = "char-CNN"
check_cb = ModelCheckpoint(file_name + '.{epoch:02d}-{val_loss:.2f}.hdf5',
                           monitor='val_loss',
                           verbose=0,
                           save_best_only=True,
                           mode='min')

earlystop_cb = EarlyStopping(monitor='val_loss',
                             patience=7,
                             verbose=0,
                             mode='auto')

# history = LossHistory()
"""
TODO:

-Add class-weight option to take into account class-imbalance on patients and controls
"""
fit_hist = model.fit(X_train,
                     y_train,
                     validation_data=(X_test, y_test),
                     verbose=0, # Set to zero if using live plotting of losses
                     class_weight = class_weights,
                     batch_size=128,
                     epochs=40,
                     #shuffle=True, # Our data is already shuffled during data loading
                     callbacks=[
                                #check_cb,
                                #tensorboard_callback,
                                PlotLossesCallback(),
                                #earlystop_cb
                               ]
                    )

# TALOS: hyperparameter optimisation

In [2]:
from haberrspd.charCNN.models_tf import char_cnn_model_talos
from haberrspd.charCNN.auxiliary_tf import create_training_data
from numpy import vstack, asarray

### Main

In [31]:
class_weights,max_sentence_length = None,None # These needs to be set
# Set the parameter space
opt_params ={'conv_output_space' : [16,32,64],
             'number_of_filters' : [1,2,3,4],
             'filter_length' : [10,15,20],
             'pool_length' : [2,4,8],
             'dense_units_layer_3' : [32,16,8,4],
             'dense_units_layer_2' : [32,16,8,4],
             'batch_size': [16,32,64,128],
             'epochs': [64,128],
             'dropout': (0, 0.5, 5),
             'conv_kernel_initializer': ['uniform','normal'],
             'conv_bias_initializer': ['uniform','normal'],
             'dense_kernel_initializer': ['uniform','normal'],
             'dense_bias_initializer': ['uniform','normal'],
             'optimizer': ['adam', 'nadam', 'rmsprop'],
             'loss': ['logcosh', 'binary_crossentropy'],
             'conv_activation':['relu', 'elu'],
             'dense_activation':['relu', 'elu'],
             'last_activation': ['sigmoid'],
             # Stationary parameters, i.e. do not get optimised
             'class_weight':[class_weights],
             'max_sentence_length':[max_sentence_length]
            }

"""
'sgd': SGD,
'rmsprop': RMSprop,
'adagrad': Adagrad,
'adadelta': Adadelta,
'adam': Adam,
'adamax': Adamax,
'nadam': Nadam
"""

def size_of_optimisation_space(opt_params):
    space = 1
    for attribute in opt_params.keys():
        space*=len(opt_params[attribute])
        
    return space

int(size_of_optimisation_space(opt_params)*0.01)

159252

### Small

In [3]:
DATA_ROOT = Path("../data/") / "MJFF" / "preproc" # Note the relative path
# Load training data and auxiliary variables
X_train, X_test, y_train, y_test, max_sentence_length = \
create_training_data(DATA_ROOT,"EnglishData-preprocessed.csv",'sentence')
class_weights = dict(zip([0,1], 
                         class_weight.compute_class_weight('balanced',
                                                           list(set(y_train)),
                                                           y_train)))
class_weights

Total number of characters: 49


{0: 0.7840059790732437, 1: 1.3802631578947369}

In [4]:
assert type(class_weights) is dict
opt_params ={'conv_output_space' : [16],
             'number_of_filters' : [2],
             'filter_length' : [10],
             'pool_length' : [2],
             'dense_units_layer_3' : [4],
             'dense_units_layer_2' : [4],
             'batch_size': [32, 64],
             'epochs': [64],
             'dropout': [0.5],
             'conv_kernel_initializer': ['uniform','normal'],
             'conv_bias_initializer': ['normal'],
             'dense_kernel_initializer': ['normal'],
             'dense_bias_initializer': ['normal'],
             'optimizer': ['adam'],
             'loss': ['binary_crossentropy'],
             'conv_activation':['elu'],
             'dense_activation':['elu'],
             'last_activation': ['sigmoid'],
#              Stationary parameters, i.e. do not get optimised
#              'class_weight': [],#class_weights,
             'max_sentence_length':[max_sentence_length]
            }

### Run Talos

In [5]:
t = ta.Scan(vstack([X_train,X_test]), 
            asarray(y_train+y_test).reshape(-1, 1), 
            model=char_cnn_model_talos,
            disable_progress_bar=True,
            params=opt_params, 
            val_split=.1)
#             grid_downsample=0.01,  # Randomly samples 1% of the grid

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.cast instead.








In [8]:
t.data.sort_values(by=['val_acc'],ascending=False)

Unnamed: 0,round_epochs,val_loss,val_acc,loss,acc,batch_size,conv_activation,conv_bias_initializer,conv_kernel_initializer,conv_output_space,...,dense_units_layer_3,dropout,epochs,filter_length,last_activation,loss.1,max_sentence_length,number_of_filters,optimizer,pool_length
1,64,1.608765,0.692308,0.300524,0.856053,32,elu,normal,normal,16,...,4,0.5,64,10,sigmoid,binary_crossentropy,14000,2,adam,2
3,64,0.946162,0.649573,0.38022,0.820782,64,elu,normal,normal,16,...,4,0.5,64,10,sigmoid,binary_crossentropy,14000,2,adam,2
2,64,1.374473,0.632479,0.412882,0.871306,64,elu,normal,uniform,16,...,4,0.5,64,10,sigmoid,binary_crossentropy,14000,2,adam,2
0,64,2.086819,0.589744,0.344246,0.812202,32,elu,normal,uniform,16,...,4,0.5,64,10,sigmoid,binary_crossentropy,14000,2,adam,2
