# Library Imports

In [1]:
import os
import numpy as np
import pandas as pd
from scipy import io
# from sklearn import metrics
from sklearn.metrics import roc_auc_score

In [2]:
import keras

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
os.chdir('/Users/sean/CloudStation/Metis/projects/project5')
!pwd

/Users/sean/CloudStation/Metis/projects/project5


# Load Data

In [4]:
# load data
raw_data = './data/raw/tox21/'
y_tr = pd.read_csv(raw_data+'tox21_labels_train.csv.gz', index_col=0, compression="gzip")
y_te = pd.read_csv(raw_data+'tox21_labels_test.csv.gz', index_col=0, compression="gzip")
x_tr_dense = pd.read_csv(raw_data+'tox21_dense_train.csv.gz', index_col=0, compression="gzip").values
x_te_dense = pd.read_csv(raw_data+'tox21_dense_test.csv.gz', index_col=0, compression="gzip").values
x_tr_sparse = io.mmread(raw_data+'tox21_sparse_train.mtx.gz').tocsc()
x_te_sparse = io.mmread(raw_data+'tox21_sparse_test.mtx.gz').tocsc()
# filter out very sparse features
sparse_col_idx = ((x_tr_sparse > 0).mean(0) > 0.05).A.ravel()
x_tr = np.hstack([x_tr_dense, x_tr_sparse[:, sparse_col_idx].A])
x_te = np.hstack([x_te_dense, x_te_sparse[:, sparse_col_idx].A])

# Choose Target

In [5]:
y_tr.columns

Index(['NR.AhR', 'NR.AR', 'NR.AR.LBD', 'NR.Aromatase', 'NR.ER', 'NR.ER.LBD',
       'NR.PPAR.gamma', 'SR.ARE', 'SR.ATAD5', 'SR.HSE', 'SR.MMP', 'SR.p53'],
      dtype='object')

In [6]:
# for target in y_tr.columns:
target = 'NR.AhR'
rows_tr = np.isfinite(y_tr[target]).values
rows_te = np.isfinite(y_te[target]).values
x,y = x_tr[rows_tr], y_tr[target][rows_tr]
x.shape

(8441, 1644)

# Build Neural Network

In [7]:
type(x.shape[1:])

tuple

In [8]:
drop_out=0.5
L2_reg = keras.regularizers.l2(0.00001)
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=x.shape[1:],name='Input_Layer'),
    keras.layers.Dense(units=2048, activation="relu",name='h1_relu_activation',\
                       kernel_regularizer=L2_reg),
    keras.layers.Dense(units=2048, activation="relu",name='h2_relu_activation',\
                       kernel_regularizer=L2_reg),
    keras.layers.Dropout(drop_out,name='Dropout'),
    keras.layers.Dense(units=1, activation='sigmoid')
])
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['mean_squared_error'])
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
h1_relu_activation (Dense)   (None, 2048)              3368960   
_________________________________________________________________
h2_relu_activation (Dense)   (None, 2048)              4196352   
_________________________________________________________________
Dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 2049      
Total params: 7,567,361
Trainable params: 7,567,361
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(
    x, y, batch_size=512, epochs=100, validation_split=0.2, verbose=1,
    callbacks=[
        keras.callbacks.EarlyStopping(patience=8,verbose=1, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=3,verbose=1)
    ])

Instructions for updating:
Use tf.cast instead.
Train on 6752 samples, validate on 1689 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 5/100
Epoch 6/100
Epoch 7/100

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 8/100
Epoch 9/100
Epoch 10/100

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 14/100
Epoch 15/100
Epoch 16/100

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0003124999930150807.
Epoch 17/100
Epoch 18/100
Restoring model weights from the end of the best epoch
Epoch 00018: early stopping


<keras.callbacks.History at 0x10a4cecf8>

In [10]:
auc_te = roc_auc_score(y_te[target][rows_te], model.predict(x_te[rows_te]))
print("%15s: %3.5f" % (target, auc_te))

         NR.AhR: 0.50000
