### 4. Fully Connected Deep Networks

In [1]:
import deepchem as dc

  return f(*args, **kwds)


In [2]:
_, (train, valid, test), _ = dc.molnet.load_tox21()

train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w

Loading raw samples now.
shard_size: 8192
About to start loading CSV from /var/folders/gt/jd9v6_wj1398xf69593kqr700000gn/T/tox21.csv.gz
Loading shard 1 of size 8192.
Featurizing sample 0
Featurizing sample 1000
Featurizing sample 2000
Featurizing sample 3000
Featurizing sample 4000
Featurizing sample 5000
Featurizing sample 6000
Featurizing sample 7000
TIMING: featurizing shard 0 took 15.911 s
TIMING: dataset construction took 16.289 s
Loading dataset from disk.
TIMING: dataset construction took 0.543 s
Loading dataset from disk.
TIMING: dataset construction took 0.529 s
Loading dataset from disk.
TIMING: dataset construction took 0.332 s
Loading dataset from disk.
TIMING: dataset construction took 0.329 s
Loading dataset from disk.


In [3]:
# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]

In [4]:
train_y[:10]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [5]:
valid_y[:10]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [6]:
train_w[:10]

array([ 1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  0.])

In [7]:
train_X[:10]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [8]:
train_X.shape

(6264, 1024)

In [9]:
train_y.shape

(6264,)

In [10]:
import tensorflow as tf

In [21]:
d = 1024
n_hidden = 50
learning_rate = .001
n_epochs = 10
batch_size = 100
dropout_prob = 1.0

In [24]:
tf.reset_default_graph()

In [25]:
with tf.name_scope("placeholders"):
    x = tf.placeholder(tf.float32, (None, d))
    y = tf.placeholder(tf.float32, (None,))
    keep_prob = tf.placeholder(tf.float32)

In [26]:
with tf.name_scope("hidden-layer"):
    W_h = tf.Variable(tf.random_normal((d, n_hidden)))
    b_h = tf.Variable(tf.random_normal((n_hidden,)))
    x_hidden = tf.nn.relu(tf.matmul(x, W_h) + b_h)
    # Apply dropout
    x_hidden = tf.nn.dropout(x_hidden, keep_prob)

In [27]:
with tf.name_scope("output"):
    W_o = tf.Variable(tf.random_normal((n_hidden, 1)))
    b_o = tf.Variable(tf.random_normal((1,)))
    
    y_logit = tf.matmul(x_hidden, W_o) + b_o
    # the sigmoid gives the class probability of 1
    y_one_prob = tf.sigmoid(y_logit)
    # Rounding P(y=1) will give the correct prediction.
    y_pred = tf.round(y_one_prob)

with tf.name_scope("loss"):
    # Compute the cross-entropy term for each datapoint
    y_expand = tf.expand_dims(y, 1)
    entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
    # Sum all contributions
    l = tf.reduce_sum(entropy)

with tf.name_scope("optim"):
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)

with tf.name_scope("summaries"):
    tf.summary.scalar("loss", l)
    merged = tf.summary.merge_all()

In [29]:
train_writer = tf.summary.FileWriter('/tmp/fcnet-tox21-dropout', tf.get_default_graph())

In [34]:
step = 0
N = train_X.shape[0]

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(n_epochs):
        pos = 0
        while pos < N:
            batch_X = train_X[pos:pos+batch_size]
            batch_y = train_y[pos:pos+batch_size]

            feed_dict = {x: batch_X, y: batch_y, keep_prob: dropout_prob}
            _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)

            print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
            train_writer.add_summary(summary, step)

            step += 1
            pos += batch_size
      
    # Make Predictions (set keep_prob to 1.0 for predictions)
    train_y_pred = sess.run(y_pred, feed_dict={x: train_X, keep_prob: 1.0})
    valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})
    test_y_pred = sess.run(y_pred, feed_dict={x: test_X, keep_prob: 1.0})

epoch 0, step 0, loss: 5115.893066
epoch 0, step 1, loss: 4969.027344
epoch 0, step 2, loss: 4838.918457
epoch 0, step 3, loss: 4810.499512
epoch 0, step 4, loss: 5042.317871
epoch 0, step 5, loss: 4906.699219
epoch 0, step 6, loss: 4586.889160
epoch 0, step 7, loss: 4844.467285
epoch 0, step 8, loss: 4981.776855
epoch 0, step 9, loss: 4379.239258
epoch 0, step 10, loss: 4012.468018
epoch 0, step 11, loss: 4254.030273
epoch 0, step 12, loss: 4332.365723
epoch 0, step 13, loss: 4234.933105
epoch 0, step 14, loss: 4021.449951
epoch 0, step 15, loss: 3774.566895
epoch 0, step 16, loss: 3771.882080
epoch 0, step 17, loss: 3792.028809
epoch 0, step 18, loss: 3790.996582
epoch 0, step 19, loss: 4079.286377
epoch 0, step 20, loss: 4253.990723
epoch 0, step 21, loss: 3850.843994
epoch 0, step 22, loss: 3500.473389
epoch 0, step 23, loss: 3347.063232
epoch 0, step 24, loss: 3531.354004
epoch 0, step 25, loss: 3500.452637
epoch 0, step 26, loss: 3638.453369
epoch 0, step 27, loss: 3408.017578
ep

In [32]:
from sklearn.metrics import accuracy_score

In [35]:
train_weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)
print("Train Weighted Classification Accuracy: %f" % train_weighted_score)
valid_weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Valid Weighted Classification Accuracy: %f" % valid_weighted_score)

Train Weighted Classification Accuracy: 0.647355
Valid Weighted Classification Accuracy: 0.602728
