## CSC 580: Critical Thinking 4 - Toxicology Testing
TODO - Description

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from deepchem import deepchem as dc
from sklearn.metrics import accuracy_score

# Seeds are set via assignment parameters
np.random.seed(456)
tf.random.set_seed(456)

# Disables eager execution so TF v1 code can be run
tf.compat.v1.disable_eager_execution()


#### Preparing the Data
Using the [Tox21 Dataset](https://tox21.gov/resources/)

In [2]:
_,(train, valid, test),_ = dc.molnet.load_tox21()

train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w

In [3]:
train_y = train_y[:,0]
valid_y = valid_y[:,0]
test_y = test_y[:,0]

train_w = train_w[:,0]
valid_w = valid_w[:,0]
test_w = test_w[:,0]

#### Defining Tensorflow Graph

In [4]:
d = 1024 # Dimensionality of the feature vector
n_hidden = 50
learning_rate = .001
n_epochs = 10
batch_size = 100
dropout_prob = 0.5

with tf.name_scope("placeholders"):
    x = tf.compat.v1.placeholder(tf.float32, (None, d))
    y = tf.compat.v1.placeholder(tf.float32, (None,))
    keep_prob = tf.compat.v1.placeholder(tf.float32) # Dropout placeholder

with tf.name_scope("hidden-layer"):
    W = tf.compat.v1.Variable(tf.compat.v1.random_normal((d, n_hidden)))
    b = tf.compat.v1.Variable(tf.compat.v1.random_normal((n_hidden,)))
    x_hidden = tf.compat.v1.nn.relu(tf.compat.v1.matmul(x,W) + b)
    x_hidden = tf.compat.v1.nn.dropout(x_hidden, keep_prob) # Applying dropout

with tf.name_scope("output"):
    W = tf.compat.v1.Variable(tf.compat.v1.random_normal((n_hidden, 1)))
    b = tf.compat.v1.Variable(tf.compat.v1.random_normal((1,)))
    y_logit = tf.compat.v1.matmul(x_hidden,W) + b

    # The sigmoid gives the class probability of 1
    y_one_prob = tf.compat.v1.sigmoid(y_logit)

    # Rounding P(y=1) will give the correct prediction
    y_pred = tf.compat.v1.round(y_one_prob)

with tf.name_scope("loss"):
    # Compute the cross-entropy term for each datapoint
    y_expand = tf.compat.v1.expand_dims(y, 1)
    entropy = tf.compat.v1.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)

    # Sum all contributions
    l = tf.compat.v1.reduce_sum(entropy)

with tf.name_scope("optim"):
    train_op = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(l)

with tf.name_scope("summaries"):
    tf.compat.v1.summary.scalar("loss", l)
    merged = tf.compat.v1.summary.merge_all()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


#### Training The Model

In [6]:
train_writer = tf.compat.v1.summary.FileWriter('/tmp/fcnet-tox-21',tf.compat.v1.get_default_graph())
N = train_X.shape[0]

with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.global_variables_initializer())
    
    # Minibatch implementation
    step = 0
    for epoch in range(n_epochs):
        pos = 0
        while pos < N:
            batch_X = train_X[pos:pos + batch_size]
            batch_y = train_y[pos:pos + batch_size]
            feed_dict = {x: batch_X, y: batch_y, keep_prob: dropout_prob}
            _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
            print("epoch %d, step %d, loss %f" % (epoch, step, loss))
            train_writer.add_summary(summary, step)

            step += 1
            pos += batch_size

        train_y_pred = sess.run(y_pred, feed_dict={x: train_X, keep_prob: 1.0})
        valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})
        train_weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)
        valid_weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
        print("Train Weighted Classification Accuracy: %f" % train_weighted_score)
        print("Valid Weighted Classification Accuracy: %f" % valid_weighted_score)


    test_y_pred = sess.run(y_pred, feed_dict={x: test_X, keep_prob: 1.0})
    test_weighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)
    print("Test Weighted Classification Accuracy: %f" % test_weighted_score)


epoch 0, step 0, loss 1291.876831
epoch 0, step 1, loss 970.218201
epoch 0, step 2, loss 1134.187500
epoch 0, step 3, loss 1120.142944
epoch 0, step 4, loss 1176.735840
epoch 0, step 5, loss 1192.459595
epoch 0, step 6, loss 1029.912598
epoch 0, step 7, loss 1315.102783
epoch 0, step 8, loss 1131.226807
epoch 0, step 9, loss 1034.675049
epoch 0, step 10, loss 1000.422241
epoch 0, step 11, loss 799.628784
epoch 0, step 12, loss 1245.834106
epoch 0, step 13, loss 1064.413086
epoch 0, step 14, loss 1029.998779
epoch 0, step 15, loss 984.309814
epoch 0, step 16, loss 1367.160156
epoch 0, step 17, loss 1185.448120
epoch 0, step 18, loss 1671.988037
epoch 0, step 19, loss 1672.233154
epoch 0, step 20, loss 1529.798340
epoch 0, step 21, loss 1524.408203
epoch 0, step 22, loss 1682.842896
epoch 0, step 23, loss 1499.609863
epoch 0, step 24, loss 1253.440918
epoch 0, step 25, loss 1446.849365
epoch 0, step 26, loss 1204.458740
epoch 0, step 27, loss 1643.072144
epoch 0, step 28, loss 1346.53051