Using Keras and SELU multilayer perceptrons on Tox21
====


In [1]:
import numpy as np
import pandas as pd
from scipy import io
    
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, concatenate
from keras.layers.noise import AlphaDropout
from keras import optimizers
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import backend as K

from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials

from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
import sklearn.utils 

import tempfile

import tensorflow as tf

Using TensorFlow backend.


In [2]:
source_directory = "/media/matthias/Big-disk/tox21-hochreiter/"

# load dense features and labels
y_tr = pd.read_csv(source_directory + 'tox21_labels_train.csv.gz', index_col=0, compression="gzip")
y_te = pd.read_csv(source_directory + 'tox21_labels_test.csv.gz', index_col=0, compression="gzip")
x_tr_dense = pd.read_csv(source_directory + 'tox21_dense_train.csv.gz', index_col=0, compression="gzip").values
x_te_dense = pd.read_csv(source_directory + 'tox21_dense_test.csv.gz', index_col=0, compression="gzip").values

# load sparse features
x_tr_sparse = io.mmread(source_directory + 'tox21_sparse_train.mtx.gz').tocsc()
x_te_sparse = io.mmread(source_directory + 'tox21_sparse_test.mtx.gz').tocsc()

# filter out very sparse features. combine filtered sparse features with dense features
sparse_col_idx = ((x_tr_sparse > 0).mean(0) > 0.05).A.ravel()
x_tr = np.hstack([x_tr_dense, x_tr_sparse[:, sparse_col_idx].A])
x_te = np.hstack([x_te_dense, x_te_sparse[:, sparse_col_idx].A])

# scale input features to zero mean and unit variance (might be important for SELU network)
scaler = preprocessing.StandardScaler().fit(x_tr)
x_tr = scaler.transform(x_tr)
x_te = scaler.transform(x_te)

In [3]:
def data(target):
    """
    Returns data for a specific assays (i.e., samples where result for assay is available)
    """
    
    # filter out data where no results for target assay are available    
    rows_tr = np.isfinite(y_tr[target]).values
    rows_te = np.isfinite(y_te[target]).values
    
    x_train = x_tr[rows_tr] 
    y_train = y_tr[target][rows_tr]
    x_test = x_te[rows_te]
    y_test = y_te[target][rows_te]   
    
    return x_train, y_train, x_test, y_test

In [74]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

def dense_column (input_node, units, layers, dropout):
    nn = input_node
    for i in range(layers):
        nn = AlphaDropout(dropout)(nn)
        nn = Dense(units, activation='selu', kernel_initializer='lecun_normal')(nn)
    return nn

def accessory_columns_block(input_node, number_of_columns = 10, units_per_column = 20, layers = 5, dropout = 0.03):
    nn = input_node
    columns = []
    for i in range(number_of_columns):
        column = dense_column(nn, units_per_column, layers, dropout)
        columns += [column]
    nn = concatenate(columns)
    return nn

def f_nn(p):
    # add constant parameters to parameter dictionary (workaround for hyperopt, take care to set global variable!)
    p = {**p, **constant_parameters}
    
    #print("\n")
    #print(p)
    
    try:
        max_epochs = 1000

        # some parameters need to be cast to integer
        p['hidden layers'] = int(p['hidden layers'])
        p['hidden units'] = int(p['hidden units'])
        p['batch size'] = int(p['batch size'])

        number_of_features = x_train.shape[1]
        
        # clear session (to avoid clutter from old models)
        K.clear_session()
        
        # the model is a classical multilayer perceptron with SELU activations and Alpha dropout
        inputs = Input(shape=(number_of_features,))
        nn = Dense(p['hidden units'], activation='selu', kernel_initializer=p['kernel initializer'])(inputs)
                
        for i in range(p['hidden layers']):
            if p['shape'] == 'triangle':
                # unit decrement per layer if triangle shape is selected, last hidden layer has 4 units
                difference = max(p['hidden units'] - 4, 4)
                decrement_per_layer = difference / p['hidden layers']
                number_of_units_in_this_layer = int(p['hidden units'] - (decrement_per_layer * i))
            else:
                number_of_units_in_this_layer = p['hidden units']
            nn = AlphaDropout(p['dropout rate'])(nn)
            nn = Dense(number_of_units_in_this_layer, activation='selu', kernel_initializer=p['kernel initializer'])(nn)
        
        if p['skip_connection']:
            # add additional layer before output layer, feed in original input
            nn = concatenate([nn, inputs])
            nn = Dense(p['hidden units'], activation='selu', kernel_initializer=p['kernel initializer'])(nn)
        
        outputs = Dense(1, activation='sigmoid')(nn)

        optimizer=optimizers.sgd(lr=p['learning rate'], momentum=0.1, nesterov=True)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['binary_accuracy'])
        
        # model.summary()
        # display(SVG(model_to_dot(model).create(prog='dot', format='svg')))

        # weigh classes (in case labels are unbalanced)
        class_weight = sklearn.utils.class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

        _, tmpfn = tempfile.mkstemp()

        callbacks = [EarlyStopping(patience=p['patience']), 
                     ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-06, verbose=1),
                     ModelCheckpoint(tmpfn, save_best_only=True, save_weights_only=True)]
        model.fit(x_train, y_train, epochs=max_epochs, batch_size=p['batch size'], validation_data=(x_test, y_test), 
              callbacks=callbacks, class_weight=class_weight, verbose=p['verbose'])

        # load model weights of epoch that had best results
        model.load_weights(tmpfn)

        p_te = model.predict(x_test)  

        # add column for other class, adding up to probability of 1
        new_col = np.subtract(1,p_te).reshape((p_te.shape[0],1))
        p_te_both_classes = np.append(new_col,p_te, 1)

        auc_te = roc_auc_score(y_test, p_te_both_classes[:, 1])
        print("%15s score on test: %3.5f" % (target, auc_te))

        return {'loss': -auc_te, 'status': STATUS_OK, 'model': model}
    
    except tf.errors.ResourceExhaustedError: 
        # fail gracefully if model is too large to fit into memory (so hyperparameter search can continue)
        print("ResourceExhaustedError occurred, skipping...")
        return {'status': STATUS_FAIL}

In [103]:
constant_parameters = {
        'shape': 'square',
        'optimizer': 'sgd',
        'kernel initializer': 'lecun_normal',
        'accessory columns': False,
        'patience': 10,
        'verbose': 0
}

parameters = {
    'batch size': 100.0, 
    'dropout rate': 0.05, 
    'hidden layers': 6.0, 
    'hidden units': 200.0, 
    'learning rate': 8e-02,
    'skip_connection': False
}

scores = np.array([])

for target in ['NR.AhR', 'NR.AR', 'NR.AR.LBD', 'NR.Aromatase', 'NR.ER', 'NR.ER.LBD', 
               'NR.PPAR.gamma', 'SR.ARE', 'SR.ATAD5', 'SR.HSE', 'SR.MMP', 'SR.p53']:
    target = target
    x_train, y_train, x_test, y_test = data(target)
    result = f_nn(parameters)
    scores = np.append(scores, -result['loss'])

np.average(scores)


Epoch 00009: reducing learning rate to 0.03999999910593033.

Epoch 00014: reducing learning rate to 0.019999999552965164.
         NR.AhR score on test: 0.86605

Epoch 00006: reducing learning rate to 0.03999999910593033.

Epoch 00011: reducing learning rate to 0.019999999552965164.
          NR.AR score on test: 0.80967

Epoch 00007: reducing learning rate to 0.03999999910593033.

Epoch 00012: reducing learning rate to 0.019999999552965164.
      NR.AR.LBD score on test: 0.75719

Epoch 00006: reducing learning rate to 0.03999999910593033.

Epoch 00011: reducing learning rate to 0.019999999552965164.
   NR.Aromatase score on test: 0.77411

Epoch 00009: reducing learning rate to 0.03999999910593033.

Epoch 00014: reducing learning rate to 0.019999999552965164.
          NR.ER score on test: 0.78853

Epoch 00007: reducing learning rate to 0.03999999910593033.

Epoch 00012: reducing learning rate to 0.019999999552965164.
      NR.ER.LBD score on test: 0.68043

Epoch 00012: reducing learn

0.80579009497722787