In [1]:
import tensorflow as tf
import numpy as np
import utils
from datetime import datetime

# 1. Import data

In [2]:
operator = 'add'
(input_train, input_dev, input_test, 
           target_train, target_dev, target_test) = utils.import_data(operator)

In [3]:
# If the training dataset takes all examples, then the dev and test datasets are the same as the training one. 
if input_dev.shape[0] == 0:
    input_dev = input_train
    target_dev = target_train
    input_test = input_train
    target_test = target_train

In [4]:
print(input_train.shape)
print(input_dev.shape)
print(input_test.shape)
print(target_train.shape)
print(target_dev.shape)
print(target_test.shape)

(65536, 16)
(65536, 16)
(65536, 16)
(65536, 9)
(65536, 9)
(65536, 9)


## 1.1 Convert 1D data to 2D data

We should get `(65536, 2, 8)` input data.

In [5]:
input_train = utils.get_2d_inputs(input_train)
input_dev = utils.get_2d_inputs(input_dev)
input_test = utils.get_2d_inputs(input_test)
print(input_train.shape)
print(input_dev.shape)
print(input_test.shape)

(65536, 2, 8, 1)
(65536, 2, 8, 1)
(65536, 2, 8, 1)


## 1.2 How to break down the training set to the batches

1. Break down the training set with the defined batch size.
2. Give away the last batch if it does not have the batch size.
3. Start a new epoch.
4. Shuffle the training set.

In [6]:
dir_saved_models = 'saved_models'
rootdir_logs = 'tf_logs'

train_print_period = 100
dev_print_period = 1000

# Contants
INPUT_DIM = input_train.shape[1] 
OUTPUT_DIM = target_train.shape[1]

# Hyperparameters - training
batch_size = 32
n_epoch = 999999999999
str_optimizer = 'adam'
learning_rate = 0.001
all_correct_stop = True

# Hyperparameters - model
nn_model_type = 'cnn' # mlp, cnn, rnn
activation = tf.nn.sigmoid
## [filter_height, filter_width, in_channels, out_channels]
f1_shape = (2, 1, input_train.shape[3], 16) # >=2
f2_shape = (1, 2, f1_shape[3], 32) # >=3
f3_shape = (1, 2, f2_shape[3], 64) # >=5
f4_shape = (1, 2, f3_shape[3], OUTPUT_DIM)

# Variables determined by other variables
train_size = input_train.shape[0]
n_batch = train_size // batch_size

# 2. Define a computational graph for the convolutional neural net

**Model structure**
* **Input**: `input`
  * Input size: 2x8
  * The first 8 inputs: the first operand of addition
  * The last 8 inputs: the second operand of addtion
* **First layer**: `h1`
  * filter size = 2x1
  * stride = 1
  * valid convolution
  * filters = h1_fn (2 seems to be enough.)
  * h1 size = 1 x 8 x h1_fn
  * `h1 = tf.sigmoid(tf.nn.conv2d(input, W1, 1, 0) + b1, name='h1')`
* **Zero padding layer**: `h1_padded`
  * `h1_padded = tf.pad(h1, tf.constant([[0, 0], [1, 0]]), "CONSTANT", constant_values=0, name='h1_padded')`
* **Second layer**: `h2` (output layer)
  * filter size = 1x1x1
  * stride = 1
  * valid convolution
  * filters = 1
  * h2 size = 2 x 2 x h2_fn
  * `h2 = tf.sigmoid(tf.nn.conv2d(h1_padded, W2, 1, 0) + b2, name='h2')`
* Output size: 9

**Accessiblility to the variables**
* I should know the weights, bias, and activations of the trained model to analyze. 
  * The weight and bias can be accessed by `tf.get_variable` with name scope.
  * Activations are easily accessible.

In [7]:
# Weight initialization
## https://www.tensorflow.org/api_docs/python/tf/contrib/layers/variance_scaling_initializer
if activation == tf.nn.relu:
    init_factor = 2.0
if activation == tf.nn.sigmoid:
    init_factor = 1.0
if activation == tf.nn.tanh:
    init_factor = 1.0
    
fan_in_1 = f1_shape[0] * f1_shape[1] * f1_shape[2]
fan_in_2 = f2_shape[0] * f2_shape[1] * f2_shape[2]
fan_in_3 = f3_shape[0] * f3_shape[1] * f3_shape[2]
fan_in_4 = f4_shape[0] * f4_shape[1] * f4_shape[2]

W1 = tf.Variable(tf.truncated_normal(f1_shape, stddev=np.sqrt(init_factor / fan_in_1)), name="W1")
b1 = tf.zeros((f1_shape[3]), name="b1")
W2 = tf.Variable(tf.truncated_normal(f2_shape, stddev=np.sqrt(init_factor / fan_in_2)), name="W2")                 
b2 = tf.zeros((f2_shape[3]), name="b2")
W3 = tf.Variable(tf.truncated_normal(f3_shape, stddev=np.sqrt(init_factor / fan_in_3)), name="W3")
b3 = tf.zeros((f3_shape[3]), name="b3")
W4 = tf.Variable(tf.truncated_normal(f4_shape, stddev=np.sqrt(init_factor / fan_in_4)), name="W4")
b4 = tf.zeros((f4_shape[3]), name="b4")

In [8]:
# NN structure
inputs = tf.placeholder(tf.float32, shape=(None, input_train.shape[1], input_train.shape[2], input_train.shape[3]), name='inputs') # None for mini-batch size
targets = tf.placeholder(tf.float32, shape=(None, target_train.shape[1]), name='targets')
            
h1 = activation(tf.nn.conv2d(inputs, W1, strides=[1, 1, 1, 1], padding="VALID") + b1, name='h1')
h2 = activation(tf.nn.conv2d(h1, W2, strides=[1, 1, 2, 1], padding="VALID") + b2, name='h2')
h3 = activation(tf.nn.conv2d(h2, W3, strides=[1, 1, 2, 1], padding="VALID") + b3, name='h3')

last_logits = tf.squeeze(tf.nn.conv2d(h3, W4, strides=[1, 1, 2, 1], padding="VALID") + b4, name='last_logits')
outputs = tf.sigmoid(last_logits, name='outputs')

predictions = utils.tf_tlu(outputs, name='predictions')

# training epoch
training_epoch = tf.placeholder(tf.float32, shape=None, name='training_epoch') 

# Loss: objective function
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=last_logits) # https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
loss = tf.reduce_mean(loss)

# Accuracy
(accuracy, n_wrong, n_correct) = utils.get_measures(targets, predictions)

# Training, optimization
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
init = tf.global_variables_initializer()

# 3. Run a session for training

In [9]:
# Logging to observe loss and accuracy for train, dev, and test sets
run_id = datetime.now().strftime('%Y%m%d%H%M%S')
logdir = '{}/{}/{}/run-{}/'.format(rootdir_logs, operator, nn_model_type, run_id)

loss_summary = tf.summary.scalar('loss', loss)
acc_summary = tf.summary.scalar('accuracy', accuracy)
n_wrong_summary = tf.summary.scalar('n_wrong', n_wrong)

epoch_summary = tf.summary.scalar('epoch', training_epoch)

merged_summary_op = tf.summary.merge_all()

train_summary_writer = tf.summary.FileWriter(logdir + '/train', graph=tf.get_default_graph())
dev_summary_writer = tf.summary.FileWriter(logdir + '/dev')
test_summary_writer = tf.summary.FileWriter(logdir + '/test')

In [10]:
print("Run_id: %s" % run_id)
is_all_correct = False

model_saver = tf.train.Saver()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        input_train, target_train = utils.shuffle_np_arrays(input_train, target_train)
        
        for i_batch in range(n_batch):
            step = n_batch * epoch + i_batch
            float_epoch = epoch + float(i_batch) / n_batch 
            
            
            batch_input, batch_output = utils.get_batch(i_batch, batch_size, input_train, target_train)
            _, train_loss, train_accuracy = sess.run([train_op, loss, accuracy],
                                       feed_dict={inputs:batch_input, targets:batch_output, training_epoch:float_epoch})
            
            if step % train_print_period == 0:
                # Run computing train loss, accuracy
                train_loss, train_accuracy, merged_summary_op_val = sess.run(
                    [loss, accuracy, merged_summary_op],
                    feed_dict={inputs:batch_input, targets:batch_output, training_epoch:float_epoch})
                #print("epoch: {}, step: {}, train_loss: {}, train_accuracy: {}".format(epoch, step, train_loss, train_accuracy))
                train_summary_writer.add_summary(merged_summary_op_val, step)
                
                    
            if step % dev_print_period == 0 or i_batch == n_batch - 1:
                # i_batch == n_batch - 1: The last batch = The end of an epoch 
                # Run computing dev loss, accuracy
                dev_loss, dev_accuracy, merged_summary_op_val, n_wrong_val = sess.run(
                    [loss, accuracy, merged_summary_op, n_wrong],
                    feed_dict={inputs:input_dev, targets:target_dev, training_epoch:float_epoch})
                
                #print("└ epoch: {}, step: {}, dev_loss: {}, dev_accuracy: {}, n_wrong: {}".format(epoch, step, dev_loss, dev_accuracy, n_wrong_val))
                dev_summary_writer.add_summary(merged_summary_op_val, step)
        
                # Save the trained model
                # End of one epoch OR trained with 100% accuracy
                if i_batch == n_batch - 1:
                    model_name = 'epoch{}-batch{}'.format(epoch, i_batch)
                    model_saver.save(sess, '{}/{}/{}/{}/{}.ckpt'.format(
                        dir_saved_models, operator, nn_model_type, run_id, model_name))
                    #print("Model saved.")
                    
                if 50 <= n_wrong_val and n_wrong_val < 100:
                    dev_print_period = 100
                if 10 <= n_wrong_val and n_wrong_val < 50:
                    dev_print_period = 10
                if n_wrong_val < 10:
                    dev_print_period = 1
                    
                ##
                # If there is no wrong operation, then ...
                if n_wrong_val == 0 and all_correct_stop:
                    is_all_correct = True
                    break # Break the batch for-loop
                    
                 
            
        # End of one epoch
        if is_all_correct and all_correct_stop:
            break # Break the epoch for-loop
                    
    # End of all epochs 
    # Run computing test loss, accuracy
    test_loss, test_accuracy, merged_summary_op_val, n_wrong_val = sess.run(
        [loss, accuracy, merged_summary_op, n_wrong],
        feed_dict={inputs:input_dev, targets:target_dev, training_epoch:float_epoch})
    
    #print("└ epoch: {}, step: {}, test_loss: {}, test_accuracy: {}, n_wrong: {}".format(epoch, step, test_loss, test_accuracy, n_wrong_val))
    test_summary_writer.add_summary(merged_summary_op_val, step)
    
    model_saver.save(sess, '{}/{}/{}/{}/{}.ckpt'.format(
        dir_saved_models, operator, nn_model_type, run_id, run_id))
    #print("Model saved.")

train_summary_writer.close()
dev_summary_writer.close()    
test_summary_writer.close()

Run_id: 20180929173017


KeyboardInterrupt: 

# 4. Restore a trained model

In [None]:
'''
model_to_import = '20180808171010-fnn-relu-256-adam-lr0.001000-bs32-testacc0.999.ckpt'

saver = tf.train.import_meta_graph('{}/{}/{}.meta'.format(dir_saved_models, run_id, model_to_import))

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    saver.restore(sess, '{}/{}/{}'.format(dir_saved_model, run_id, model_to_import))
    
    # End of all epochs 
    # Run computing test loss, accuracy
    test_loss, summary, test_accuracy = sess.run(
        [loss, merged_summary_op, accuracy],
        feed_dict={inputs:input_dev, targets:target_dev})
    
    print("└ test_loss: {}, test_accuracy: {}".format(test_loss, test_accuracy))'''