### 5. Hyperparameter Optimization

In [4]:
import deepchem as dc
from sklearn.metrics import accuracy_score

In [2]:
_, (train, valid, test), _ = dc.molnet.load_tox21()
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w

# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]

Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.


In [5]:
from sklearn.ensemble import RandomForestClassifier

# Generate tensorflow graph
sklearn_model = RandomForestClassifier(class_weight="balanced", n_estimators=50)
print("About to fit model on training set.")
sklearn_model.fit(train_X, train_y)

train_y_pred = sklearn_model.predict(train_X)
valid_y_pred = sklearn_model.predict(valid_X)
test_y_pred = sklearn_model.predict(test_X)

weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)
print("Weighted train Classification Accuracy: %f" % weighted_score)

weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Weighted valid Classification Accuracy: %f" % weighted_score)

weighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)
print("Weighted test Classification Accuracy: %f" % weighted_score)

About to fit model on training set.
Weighted train Classification Accuracy: 0.988575
Weighted valid Classification Accuracy: 0.664234
Weighted test Classification Accuracy: 0.664475


### Graduate Student Descent

In [9]:
import tensorflow as tf

In [17]:
def eval_tox21_hyperparams(n_hidden=50, n_layers=1, learning_rate=.001,
                           dropout_prob=0.5, n_epochs=45, batch_size=100,
                           weight_positives=True):

    print("---------------------------------------------")
    print("Model hyperparameters")
    print("n_hidden = %d" % n_hidden)
    print("n_layers = %d" % n_layers)
    print("learning_rate = %f" % learning_rate)
    print("n_epochs = %d" % n_epochs)
    print("batch_size = %d" % batch_size)
    print("weight_positives = %s" % str(weight_positives))
    print("dropout_prob = %f" % dropout_prob)
    print("---------------------------------------------")
    print("---------------------------------------------")

    d = 1024
    graph = tf.Graph()
    with graph.as_default():

        # Generate tensorflow graph
        with tf.name_scope("placeholders"):
            x = tf.placeholder(tf.float32, (None, d))
            y = tf.placeholder(tf.float32, (None,))
            w = tf.placeholder(tf.float32, (None,))
            keep_prob = tf.placeholder(tf.float32)
        for layer in range(n_layers):
              with tf.name_scope("layer-%d" % layer):
                W = tf.Variable(tf.random_normal((d, n_hidden)))
                b = tf.Variable(tf.random_normal((n_hidden,)))
                x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
                # Apply dropout
                x_hidden = tf.nn.dropout(x_hidden, keep_prob)
        with tf.name_scope("output"):
            W = tf.Variable(tf.random_normal((n_hidden, 1)))
            b = tf.Variable(tf.random_normal((1,)))
            y_logit = tf.matmul(x_hidden, W) + b
            # the sigmoid gives the class probability of 1
            y_one_prob = tf.sigmoid(y_logit)
            # Rounding P(y=1) will give the correct prediction.
            y_pred = tf.round(y_one_prob)
        with tf.name_scope("loss"):
            # Compute the cross-entropy term for each datapoint
            y_expand = tf.expand_dims(y, 1)
            entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
            # Multiply by weights
            if weight_positives:
                w_expand = tf.expand_dims(w, 1)
                entropy = w_expand * entropy
            
            # Sum all contributions
            l = tf.reduce_sum(entropy)

        with tf.name_scope("optim"):
            train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)

        with tf.name_scope("summaries"):
            tf.summary.scalar("loss", l)
            merged = tf.summary.merge_all()

        hyperparam_str = "d-%d-hidden-%d-lr-%f-n_epochs-%d-batch_size-%d-weight_pos-%s" % (d, n_hidden, learning_rate, n_epochs, batch_size, str(weight_positives))
        train_writer = tf.summary.FileWriter('/tmp/fcnet-func-' + hyperparam_str, tf.get_default_graph())
        N = train_X.shape[0]
        
    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        step = 0
        for epoch in range(n_epochs):
            pos = 0
            while pos < N:
                batch_X = train_X[pos:pos+batch_size]
                batch_y = train_y[pos:pos+batch_size]
                batch_w = train_w[pos:pos+batch_size]
                
                feed_dict = {x: batch_X, y: batch_y, w: batch_w, keep_prob: dropout_prob}
                _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
#             print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
                train_writer.add_summary(summary, step)

                step += 1
                pos += batch_size

            # Make Predictions (set keep_prob to 1.0 for predictions)
            valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})

        weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
        print("Valid Weighted Classification Accuracy: %f" % weighted_score)
    return weighted_score

In [7]:
import numpy as np

In [19]:
scores = {}
n_reps = 3
hidden_sizes = [30, 60]
epochs = [15, 30]
dropouts = [.5]
num_layers = [1, 2]

for rep in range(n_reps):
    for n_epochs in epochs:
        for hidden_size in hidden_sizes:
            for dropout in dropouts:
                for n_layers in num_layers:
                    score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs, dropout_prob=dropout, n_layers=n_layers)
                    if (hidden_size, n_epochs, dropout, n_layers) not in scores:
                        scores[(hidden_size, n_epochs, dropout, n_layers)] = []
                    scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)

                    print("All Scores")
print(scores)

---------------------------------------------
Model hyperparameters
n_hidden = 30
n_layers = 1
learning_rate = 0.001000
n_epochs = 15
batch_size = 100
weight_positives = True
dropout_prob = 0.500000
---------------------------------------------
---------------------------------------------
Valid Weighted Classification Accuracy: 0.637976
All Scores
---------------------------------------------
Model hyperparameters
n_hidden = 30
n_layers = 2
learning_rate = 0.001000
n_epochs = 15
batch_size = 100
weight_positives = True
dropout_prob = 0.500000
---------------------------------------------
---------------------------------------------
Valid Weighted Classification Accuracy: 0.630551
All Scores
---------------------------------------------
Model hyperparameters
n_hidden = 60
n_layers = 1
learning_rate = 0.001000
n_epochs = 15
batch_size = 100
weight_positives = True
dropout_prob = 0.500000
---------------------------------------------
---------------------------------------------
Valid W

In [20]:
avg_scores = {}
for params, param_scores in scores.items():
    avg_scores[params] = np.mean(np.array(param_scores))
print("Scores Averaged over %d repetitions" % n_reps)
print(avg_scores)

Scores Averaged over 3 repetitions
{(30, 15, 0.5, 2): 0.6478596905116395, (30, 15, 0.5, 1): 0.62031134244552233, (60, 15, 0.5, 1): 0.62078844968916103, (30, 30, 0.5, 1): 0.62288468469764435, (60, 30, 0.5, 2): 0.66660513022150714, (60, 30, 0.5, 1): 0.67376653392376351, (30, 30, 0.5, 2): 0.64956592830924231, (60, 15, 0.5, 2): 0.64376551897200562}


In [21]:
n_rates = 5
learning_rates = 10**(-np.random.uniform(low=1, high=6, size=n_rates))
learning_rates

array([  3.96140197e-05,   1.94962910e-03,   5.73339445e-03,
         2.00852400e-06,   2.40747690e-04])