#### Import Dependencies

In [1]:
import tensorflow as tf
import numpy as np
from datetime import datetime
from data_utils import *

  from ._conv import register_converters as _register_converters


#### Load Data

In [None]:
train_valid_ligids  = np.load('../data/PHARM_TRAIN_X.npy')
train_valid_smiles = np.load('../data/PHARM_TRAIN_SMILES.npy')
train_valid_scores = np.load('../data/Y_train.npy')

print('train_valid_ligids shape: {}'.format(train_valid_ligids.shape))
print('train_valid_smiles shape: {}'.format(train_valid_smiles.shape))
print('train_valid_scores shape: {}'.format(train_valid_scores.shape))

#### Train Validation Test Split

In [None]:
train_data, validation_data = train_validation_split(train_valid_ligids,
                                                     train_valid_smiles,
                                                     train_valid_scores,
                                                     num_val_lig=3046, 
                                                     num_val_smi=10581)
del train_valid_ligids, train_valid_smiles, train_valid_scores

#### Define Model

In [2]:
# Hyper Paramters
LEARNING_RATE = .00001
LAMBDA = .000
DROPOUT = .5
NUM_EPOCHS = 5
VALIDATIONS_PER_EPOCH = 5
TRAINING_BATCH_SIZE = 1000
USE_PERCENT_DATA = .05
VALIDATION_BATCH_SIZE = 10000

# Model
L1_UNITS = 10
L2_UNITS = 10
NUM_OUTPUTS = 10

tf.reset_default_graph()

relu = tf.nn.relu
elu = tf.nn.elu
xavier_init = tf.contrib.layers.xavier_initializer()
zero_init = tf.zeros_initializer()
l2_reg = tf.contrib.layers.l2_regularizer(scale=LAMBDA)

with tf.name_scope('inputs') as scope:
    smiles = tf.placeholder(shape=(None, 167), dtype=tf.float32, name='smiles')
    ligids = tf.placeholder(shape=(None, 9), dtype=tf.float32, name='ligids')
    Y = tf.placeholder(shape=(None), dtype=tf.int32, name='score')
    training = tf.placeholder_with_default(input=False, shape=(), name='training')
    
with tf.name_scope('hidden_layers') as scope:
    smiles_layer1 = tf.layers.dense(inputs=ligids,
                                    units=L1_UNITS, 
                                    activation=elu,
                                    kernel_initializer=xavier_init,
                                    bias_initializer=zero_init,
                                    kernel_regularizer=l2_reg,
                                    bias_regularizer=l2_reg,
                                    name='smiles_layer1')
    ligid_layer1 = tf.layers.dense(inputs=smiles,
                                   units=L1_UNITS, 
                                   activation=elu,
                                   kernel_initializer=xavier_init,
                                   bias_initializer=zero_init,
                                   kernel_regularizer=l2_reg,
                                   bias_regularizer=l2_reg,
                                   name='ligid_layer1')
    layer1 = tf.add((ligid_layer1, smiles_layer1), name='layer1')
    layer2 = tf.layers.dense(inputs=layer1, 
                             units=L2_UNITS, 
                             activation=elu,
                             kernel_initializer=xavier_init,
                             bias_initializer=zero_init,
                             kernel_regularizer=l2_reg,
                             bias_regularizer=l2_reg,
                             name='layer2')
    
with tf.name_scope('predicted_score') as scope:
    logits = tf.layers.dense(inputs=layer2,
                             units=NUM_OUTPUTS,
                             name='logits')
    pred_score = tf.nn.softmax(logits=logits, axis=1)
    pred_score = tf.argmax(pred_score, axis=1, name='Y_hat')
        
with tf.name_scope('train') as scope:
    mae = tf.losses.absolute_difference(Y,pred_score)
    reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    xent_loss = tf.reduce_sum(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=Y))
    loss = xent_loss+reg_loss
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
    train_op = optimizer.minimize(loss)

INFO:tensorflow:Scale of 0 disables regularizer.


TypeError: add() missing 1 required positional argument: 'y'

#### Run Model

In [71]:
# Misc. constants
num_training_batches = int(train_data.num_scores/TRAINING_BATCH_SIZE*USE_PERCENT_DATA)
num_validation_batches = int(validation_data.num_scores/VALIDATION_BATCH_SIZE)
TB_PERIOD = 1000
TRAIN_PERIOD = 100
VALIDATION_PERIOD = 100

# Saver - Prediction Tensors
tf.get_collection_ref('pred_ops').clear()
tf.get_collection('pred_ops')
tf.add_to_collection('pred_ops', X)
tf.add_to_collection('pred_ops', Y)
tf.add_to_collection('pred_ops', pred_score)
# Saver - Training Tensors
tf.get_collection_ref('train_ops').clear()
tf.get_collection('train_ops')
tf.add_to_collection('train_ops', X)
tf.add_to_collection('train_ops', Y)
tf.add_to_collection('train_ops', mae)
tf.add_to_collection('train_ops', loss)
tf.add_to_collection('train_ops', train_op)
saver = tf.train.Saver(max_to_keep=1000)

# Tensorboard - Graph
time_now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
tensorboard_logdir = '../tf_log/run-{}'.format(time_now)
print('tensorboard log_dir: {}'.format(tensorboard_logdir))
writer = tf.summary.FileWriter(tensorboard_logdir)
writer.add_graph(tf.get_default_graph())
# Tensorboard - Summaries
summaries = [tf.summary.scalar('mean_abs_error', mae), 
             tf.summary.scalar('xent_error', xent_loss), 
             tf.summary.scalar('regularization', reg_loss), 
             tf.summary.scalar('total_loss', loss),
             tf.summary.histogram('ligids',ligids),
             tf.summary.histogram('smiles',smiles),
             tf.summary.histogram('layer1',layer1),
             tf.summary.histogram('layer2',layer2),
             tf.summary.histogram('predictions',pred_score),
             tf.summary.histogram('ground_truth',Y)]

# Start Session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(NUM_EPOCHS):       
        # Train Model
        train_data.reset()
        for step in range(num_training_batches):
            ligids_batch, smiles_batch, scores_batch = train_data.next_batch(TRAINING_BATCH_SIZE)
            _, train_loss = sess.run([train_op, loss], feed_dict={ligids:ligids_batch, 
                                                                  smiles:smiles_batch,
                                                                  Y:scores_batch})
            if step%TRAIN_PERIOD == 0:
                print('{}/{} train_loss_batch: {:.3f}'.format(step, 
                                                              num_training_batches, 
                                                              train_loss), end='\r')
            # Tensorboard
            if step%TB_PERIOD == 0:
                s = [sess.run(summary, feed_dict={ligids:ligids_batch, 
                                                  smiles:smiles_batch, 
                                                  Y:scores_batch}) for summary in summaries]
                global_step = step+epoch*num_training_batches
                [writer.add_summary(summary, global_step) for summary in s]
                
        # Validation
        mae_value = []
        validation_data.reset()
        for step in range(num_validation_batches): 
            ligids_batch, smiles_batch, scores_batch = validation_data.next_batch(VALIDATION_BATCH_SIZE)
            mae_batch = sess.run(mae, feed_dict={ligids:ligids_batch, 
                                                 smiles:smiles_batch,
                                                 Y:scores_batch})
            mae_value.append(mae_batch)
            if step%VALIDATION_PERIOD == 0:
                print('{}/{} mae_batch: {}'.format(step, 
                                                   num_validation_batches, 
                                                   mae_batch), end='\r')
        mae_value = sum(mae_value)/len(mae_value)
        print('EPOCH: {:<10} | mae: {:<20}'.format(epoch+1, mae_value))
        
        # Save Model w/ name: e{epoch number}_l{loss}
        saver_filename = 'mae{}_e{}'.format(mae_value, epoch)        
        saver.save(sess, '../models/{}/{}'.format(time_now, saver_filename))

tensorboard log_dir: ../tf_log/run-20180521014731
EPOCH: 1          | mae: 1.4503773130814086  
EPOCH: 2          | mae: 1.486102979313432   
EPOCH: 3          | mae: 1.4957207948579911  
EPOCH: 4          | mae: 1.4983752015434681  
EPOCH: 5          | mae: 1.4997356301653393  
