In [1]:
%load_ext autoreload
%autoreload 2
from __future__ import print_function

import os
import time

import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import reset_default_graph
from tensorflow.contrib.layers import fully_connected, batch_norm
from tensorflow.contrib import rnn

import matplotlib
import matplotlib.pyplot as plt

from utils import *

All the hyperparameters

In [2]:
batch_size = 64
number_inputs = 66
number_outputs = 8
max_seq_len = 400 # max 700

Load all the data

In [3]:
# Get all the data
relative_path = './data/SetOf7604Proteins/'
trainList_addr = relative_path + 'trainList'
validList_addr = relative_path + 'validList'
testList_addr = relative_path + 'testList'

start = time.time()
train_list = read_list(trainList_addr)
valid_list = read_list(validList_addr)
test_list = read_list(testList_addr)

train_generator = generate_batch(train_list, relative_path, max_seq_len, batch_size)

X_train, t_train, len_train, mask_train = train_generator.next()
X_valid, t_valid, len_valid, mask_valid = read_data(valid_list, relative_path, max_seq_length=683) # 683

timeSpent = time.time() - start
print("Spent {:.1f}s to load the data".format(timeSpent))
print("X_train:", X_train.shape)
print("X_valid:", X_valid.shape)

Spent 17.2s to load the data
X_train: (64, 400, 66)
X_valid: (1267, 683, 66)


In [4]:
print(X_train.shape)
print(mask_train.shape)
print(len_train.shape)

(64, 400, 66)
(64, 400)
(64,)


In [5]:
learning_rate = 0.001
num_layers = 2
state_size = 100
num_units_l1 = 100
dropout = False
dropout_keep_rate = 0.5
clip_gradients = False
max_grad_norm = 5
attention_size = 50

reset_default_graph()

X_input = tf.placeholder(tf.float32, shape=[None, None, number_inputs], name='X_input')
X_length = tf.placeholder(tf.int32, shape=[None,], name='X_length')
t_input = tf.placeholder(tf.int32, shape=[None, None], name='t_input')
X_mask = tf.placeholder(tf.int32, shape=[None, None], name='X_mask')
phase = tf.placeholder(tf.bool, name='phase')

def GRU_with_dropout(dropout=True):
    if dropout:
        return rnn.DropoutWrapper(rnn.GRUCell(state_size),
                                  output_keep_prob=dropout_keep_rate)
    else:
        return rnn.GRUCell(state_size)

cells = rnn.MultiRNNCell([GRU_with_dropout(dropout=dropout) for _ in range(num_layers)],
                         state_is_tuple=True)
outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=cells, cell_bw=cells, inputs=X_input,
                                             sequence_length=X_length, 
                                             dtype=tf.float32)

outputs = tf.concat(outputs, 2)
batch_size_shp = tf.shape(outputs)[0]
seq_len_shp = tf.shape(outputs)[1]
outputs = tf.reshape(outputs, [-1, state_size*2])
# bn1 = batch_norm(outputs, center=True, scale=True, is_training=phase)
l1 = fully_connected(outputs, num_units_l1) # , normalizer_fn=batch_norm
# bn2 = batch_norm(l1, center=True, scale=True, is_training=phase)
l_out = fully_connected(l1, number_outputs, activation_fn=None)
y = tf.reshape(l_out, [batch_size_shp, seq_len_shp, number_outputs])

**This piece is working!!!**

The following code uses sparse_softmax_cross_entropy_with_logits but with number_outputs = 9. 

In [6]:
X_mask = tf.to_float(X_mask)
mask_sum = tf.reduce_sum(X_mask)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=t_input, logits=y)
cross_entropy *= X_mask
loss = tf.reduce_sum(cross_entropy) / mask_sum

predictions = tf.to_int32(tf.argmax(y, 2))
correct = tf.to_float(tf.equal(predictions, t_input))
total_correct_preds = tf.reduce_sum(correct * X_mask)
accuracy =  total_correct_preds / mask_sum

In [7]:
global_step = tf.Variable(0, name='global_step', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
grads = [element[0] for element in grads_and_vars]
variables =  [element[1] for element in grads_and_vars]
if clip_gradients:
    grads = tf.clip_by_global_norm(grads, max_grad_norm)[0]   
grad_norm = tf.global_norm(grads)
grads_and_vars = [(grads[i], variables[i]) for i in range(len(grads))]

# update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# with tf.control_dependencies(update_ops):
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
    

In [8]:
# print("=" * 10 + "validating the model"+ "=" * 10)
# # test validation part
# # sess.run(tf.global_variables_initializer())
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     feed_dict = {X_input: X_valid, X_length: len_valid,
#                  t_input: t_valid, X_mask: mask_valid}
#     fetches = [enc_outputs, outputs, l_out, l_out_reshape, y]
#     res = tuple(sess.run(fetches=fetches, feed_dict=feed_dict))
#     for element in res:
#         print(element.shape)
# print("=" * 10 + "Model validation finished"+ "=" * 10)

In [9]:
def evaluate(X_data, y_data, len_data, mask_data):
    num_examples = X_data.shape[0]
    sess = tf.get_default_session()
    total_correct = 0
    loss_total = 0
    for offset in range(0, num_examples, batch_size):
        batch_x = X_data[offset:offset+batch_size, :, :]
        batch_y = y_data[offset:offset+batch_size, :]
        batch_len = len_data[offset:offset+batch_size]
        batch_mask = mask_data[offset:offset+batch_size, :]
        
        feed = {X_input: batch_x, X_length: batch_len,
                t_input: batch_y, X_mask: batch_mask,
                phase: False}
        
        loss_val, correct_val = sess.run([loss, total_correct_preds], feed_dict=feed)
        total_correct += correct_val
        loss_total += loss_val
    return total_correct / np.sum(mask_data), loss_total

In [10]:
def evaluate_individual(protein_name, CASP):
    features, labels, seq_len, mask = read_protein(protein_name, relative_path, expand_dims=True, CASP=CASP)
    feed = {X_input: features, X_length: seq_len,
            t_input: labels, X_mask: mask, phase: False}
    sess = tf.get_default_session()
    loss_val, correct_val = sess.run([loss, total_correct_preds], feed_dict=feed)
    return correct_val, np.sum(mask), correct_val / np.sum(mask), loss_val

def sequential_evaluate(dataList, CASP=False, print_interval=100):
    loss_total = 0
    total_correct = 0
    mask_total = 0
    for i, protein_name in enumerate(dataList):
        correct_val, mask_sum, _, loss_val = evaluate_individual(protein_name, CASP)
        total_correct += correct_val
        loss_total += loss_val
        mask_total += mask_sum
        if i % print_interval == 0:
            print("Number of processed proteins", i)
            print("Snapshot: Accuracy: {}; loss: {}".format(total_correct / mask_total, loss_total))
    return total_correct / mask_total, loss_total

In [11]:
val_interval = batch_size * 5
print_interval = batch_size * 10
samples_to_process = 1e4
samples_processed = 0
samples_val = []
costs, accs_val, grads_norm = [], [], []
saver1 = tf.train.Saver()

NUM_THREADS = int(os.environ['OMP_NUM_THREADS'])
config_slurm = tf.ConfigProto(intra_op_parallelism_threads=NUM_THREADS, 
                        inter_op_parallelism_threads=NUM_THREADS)

with tf.Session(config=config_slurm) as sess:
    sess.run(tf.global_variables_initializer())
    # writer = tf.summary.FileWriter('./graphs', sess.graph)
    start = time.time()
    while samples_processed < samples_to_process:
        fetches_tr = [train_op, loss, accuracy, grad_norm]
        feed_dict_tr = {X_input: X_train, X_length: len_train,
                        t_input: t_train, X_mask: mask_train,
                        phase: True}
        res = tuple(sess.run(fetches=fetches_tr, feed_dict=feed_dict_tr))
        _, batch_cost, batch_acc, batch_grad_norm = res
        samples_processed += batch_size
#         print("{} samples processed".format(samples_processed))
        grads_norm += [batch_grad_norm]

        #validation data
        if samples_processed % val_interval == 0:
            acc_val, acc_loss = evaluate(X_valid, t_valid, len_valid, mask_valid)
            costs += [batch_cost]
            samples_val += [samples_processed]
            accs_val += [acc_val]
            if accs_val[-1] >= max(accs_val):
                print("Current valid_accuracy {:.1f}% is best so far. Saving model...".format(accs_val[-1]*100))
                saver1.save(sess, './BiGRU_noBN')
            if samples_processed % print_interval == 0:
                print("samples_processed: %d, batch_cost: %.3f, validation_accs: %.4f, validation_loss: %.4f" % \
                      (samples_processed, batch_cost, acc_val, acc_loss))

    print("time spent: {:.3f} seconds".format(time.time() - start))
    # writer.close()

Current valid_accuracy 37.5% is best so far. Saving model...
Current valid_accuracy 45.1% is best so far. Saving model...
samples_processed: 640, batch_cost: 1.481, validation_accs: 0.4508, validation_loss: 29.9877
Current valid_accuracy 48.5% is best so far. Saving model...
Current valid_accuracy 54.6% is best so far. Saving model...
samples_processed: 1280, batch_cost: 1.297, validation_accs: 0.5463, validation_loss: 26.3593
Current valid_accuracy 57.0% is best so far. Saving model...
Current valid_accuracy 60.8% is best so far. Saving model...
samples_processed: 1920, batch_cost: 1.080, validation_accs: 0.6076, validation_loss: 22.1347
Current valid_accuracy 64.2% is best so far. Saving model...
Current valid_accuracy 65.3% is best so far. Saving model...
samples_processed: 2560, batch_cost: 0.916, validation_accs: 0.6531, validation_loss: 19.3161
Current valid_accuracy 66.5% is best so far. Saving model...
Current valid_accuracy 67.2% is best so far. Saving model...
samples_process

In [12]:
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('BiGRU_noBN.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./'))
    print("Model restored")
    start = time.time()
    acc_valid, loss_valid = sequential_evaluate(valid_list)
    print("finished in {:.1f} seconds".format(time.time() - start))
    print("Accuracy: {}; loss: {}".format(acc_valid, loss_valid))
#     for i in range(10):
#         n_correct, acc, loss_ = evaluate_individual(valid_list[i])
#         print(n_correct, acc, loss_)

INFO:tensorflow:Restoring parameters from ./BiGRU_noBN


NotFoundError: Key BatchNorm/moving_variance not found in checkpoint
	 [[Node: save_1/RestoreV2_7 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save_1/Const_0_0, save_1/RestoreV2_7/tensor_names, save_1/RestoreV2_7/shape_and_slices)]]

Caused by op u'save_1/RestoreV2_7', defined at:
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-ddfabb62eb45>", line 2, in <module>
    new_saver = tf.train.import_meta_graph('BiGRU_BN.meta')
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1698, in import_meta_graph
    **kwargs)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tensorflow/python/framework/meta_graph.py", line 656, in import_scoped_meta_graph
    producer_op_list=producer_op_list)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tensorflow/python/framework/importer.py", line 313, in import_graph_def
    op_def=op_def)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home-nfs/jtang7/anaconda2/envs/tf-1.3/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Key BatchNorm/moving_variance not found in checkpoint
	 [[Node: save_1/RestoreV2_7 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save_1/Const_0_0, save_1/RestoreV2_7/tensor_names, save_1/RestoreV2_7/shape_and_slices)]]


In [None]:
fig, ax1 = plt.subplots()
plt.plot(samples_val, accs_val, 'b-')
ax1.set_ylabel('Validation Accuracy', fontsize=15)
ax1.set_xlabel('Processed samples', fontsize=15)
plt.title('Accuracy & Cost with BN', fontsize=20)
ax2 = ax1.twinx()
ax2.plot(samples_val, costs, 'r-')
ax2.set_ylabel('Training Cost', fontsize=15)
plt.grid('on')
plt.savefig("out.png")
plt.show()

In [None]:
# samples_noBN = samples_val[:]
# accs_val_noBN = accs_val[:]
# costs_train_noBN = costs[:]

# with open("result_noBN.txt","w") as f:
#     for (sample_val, acc_val, cost) in zip(samples_noBN, accs_val_noBN, costs_train_noBN):
#         f.write("{0},{1},{2}\n".format(sample_val, acc_val, cost))

In [None]:
samples_noBN, accs_val_noBN, costs_train_noBN = [], [], []
with open("result_noBN.txt","r") as f:
    for line in f:
        line = line.split(',')
        samples_noBN.append(line[0])
        accs_val_noBN.append(line[1])
        costs_train_noBN.append(line[2])

In [None]:
fig, ax1 = plt.subplots()
plt.plot(samples_noBN, accs_val_noBN, 'b-')
ax1.set_ylabel('Validation Accuracy', fontsize=15)
ax1.set_xlabel('Processed samples', fontsize=15)
plt.title('Accuracy & Cost without BN', fontsize=20)
ax2 = ax1.twinx()
ax2.plot(samples_noBN, costs_train_noBN, 'r-')
ax2.set_ylabel('Training Cost', fontsize=15)
plt.grid('on')
plt.savefig("out.png")
plt.show()

In [None]:
print(len(grads_norm))
plt.plot(np.arange(len(grads_norm)), grads_norm)
plt.show()

In [13]:
with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
    new_saver = tf.train.import_meta_graph('BiGRU_noBN.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./'))
    print("Model restored")
    start = time.time()
    acc_test, loss_test = sequential_evaluate(test_list)
    print("finished in {:.1f} seconds".format(time.time() - start))
    print("Accuracy: {}; loss: {}".format(acc_test, loss_test))

INFO:tensorflow:Restoring parameters from ./BiGRU_noBN
Model restored
Number of processed proteins 0
Snapshot: Accuracy: 0.660550458716; loss: 1.04222917557
Number of processed proteins 100
Snapshot: Accuracy: 0.673852372245; loss: 90.547955066
Number of processed proteins 200
Snapshot: Accuracy: 0.683326343834; loss: 171.967855833
Number of processed proteins 300
Snapshot: Accuracy: 0.696296903162; loss: 249.610781245
Number of processed proteins 400
Snapshot: Accuracy: 0.698432955787; loss: 334.137141638
Number of processed proteins 500
Snapshot: Accuracy: 0.698249997541; loss: 417.02331835
Number of processed proteins 600
Snapshot: Accuracy: 0.699387616988; loss: 494.954165161
Number of processed proteins 700
Snapshot: Accuracy: 0.701359147081; loss: 570.458231442
Number of processed proteins 800
Snapshot: Accuracy: 0.700909983318; loss: 651.645652674
Number of processed proteins 900
Snapshot: Accuracy: 0.699774349286; loss: 735.816624306
Number of processed proteins 1000
Snapshot: 

In [14]:
relative_path = './data/CASP11/'
CASP11_addr = relative_path + 'proteinList'
CASP11_list = read_list(CASP11_addr)
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('BiGRU_noBN.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./'))
    print("Model restored")
    start = time.time()
    acc_11, loss_11 = sequential_evaluate(CASP11_list, CASP=True, print_interval=8)
    print("finished in {:.1f} seconds".format(time.time() - start))
    print("Accuracy: {}; loss: {}".format(acc_11, loss_11))

INFO:tensorflow:Restoring parameters from ./BiGRU_noBN
Model restored
Number of processed proteins 0
Snapshot: Accuracy: 0.75; loss: 0.713340342045
Number of processed proteins 8
Snapshot: Accuracy: 0.666666666667; loss: 7.60328298807
Number of processed proteins 16
Snapshot: Accuracy: 0.662435862814; loss: 15.3636464477
Number of processed proteins 24
Snapshot: Accuracy: 0.6669015325; loss: 22.9273266792
Number of processed proteins 32
Snapshot: Accuracy: 0.665755564233; loss: 30.5735407472
Number of processed proteins 40
Snapshot: Accuracy: 0.667281483759; loss: 38.131865263
Number of processed proteins 48
Snapshot: Accuracy: 0.676953494369; loss: 44.3409409821
Number of processed proteins 56
Snapshot: Accuracy: 0.686832226393; loss: 50.5891824961
Number of processed proteins 64
Snapshot: Accuracy: 0.688131951466; loss: 57.3169320226
finished in 17.1 seconds
Accuracy: 0.689448855137; loss: 63.448605597


In [15]:
relative_path = './data/CASP12/'
CASP12_addr = relative_path + 'proteinList'
CASP12_list = read_list(CASP12_addr)
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('BiGRU_noBN.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./'))
    print("Model restored")
    start = time.time()
    acc_12, loss_12 = sequential_evaluate(CASP12_list, CASP=False, print_interval=5)
    print("finished in {:.1f} seconds".format(time.time() - start))
    print("Accuracy: {}; loss: {}".format(acc_12, loss_12))

INFO:tensorflow:Restoring parameters from ./BiGRU_noBN
Model restored
Number of processed proteins 0
Snapshot: Accuracy: 0.433628318584; loss: 1.61523652077
Number of processed proteins 5
Snapshot: Accuracy: 0.668016194332; loss: 6.57240086794
Number of processed proteins 10
Snapshot: Accuracy: 0.685314685315; loss: 10.0399840772
Number of processed proteins 15
Snapshot: Accuracy: 0.675903614458; loss: 15.0433500111
Number of processed proteins 20
Snapshot: Accuracy: 0.659784757303; loss: 19.8404586613
Number of processed proteins 25
Snapshot: Accuracy: 0.654732797765; loss: 26.1281520128
Number of processed proteins 30
Snapshot: Accuracy: 0.650573553511; loss: 31.528159976
Number of processed proteins 35
Snapshot: Accuracy: 0.650206207944; loss: 37.0465227365
finished in 10.6 seconds
Accuracy: 0.651434543036; loss: 41.349455893
