In [1]:
import tensorflow as tf
import numpy as np
from preposessing_data import generate_batch, data_generator

In [2]:
ENCODER_INPUT_SIZE = 4096
HIDDEN_LAYER_SIZE = 1024
EMBEDDING_SIZE = 1024
NUM_OF_LAYER = 2
BOS = 0
EOS = 1
BATCH_SIZE = 29
keep_prob = 1.0


In [3]:
X, y_inputs, y_targets, word_idx, idx_word, num_of_words, max_length, sequence_lengths = data_generator('./data/training_data', './data/training_label.json', 2)

starting load data......
encoding data........
adding special symbol......
[[  0   4   5 ...   3   3   3]
 [  0   4  10 ...   3   3   3]
 [  0   4  15 ...   3   3   3]
 ...
 [  0   4   5 ...   3   3   3]
 [  0   8 397 ...   3   3   3]
 [  0   4  10 ...   3   3   3]] [[  4   5   6 ...   3   3   3]
 [  4  10  11 ...   3   3   3]
 [  4  15  16 ...   3   3   3]
 ...
 [  4   5 111 ...   3   3   3]
 [  8 397 507 ...   3   3   3]
 [  4  10  17 ...   3   3   3]]
Done data generation!


# Define the model

In [4]:
tf.reset_default_graph()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
sess = tf.InteractiveSession(config=tf.ConfigProto(device_count = {'GPU': 1}, gpu_options=gpu_options))
with tf.name_scope('input'):
    encoder_inputs = tf.placeholder(tf.float32, shape = [None, None, ENCODER_INPUT_SIZE])
    decoder_inputs = tf.placeholder(tf.int32, shape = [None, None])
    decoder_targets = tf.placeholder(tf.int32, shape = [None, None])
    sequence_length = tf.placeholder(tf.int32, shape = [None])
    sequence_length_fake = tf.placeholder(tf.int32, shape = [None])
    sampling_prob = tf.placeholder(tf.float32, shape = [])

# Define Encoder 

In [None]:
with tf.name_scope('encoder'):
    encoder_inputs_embedded = tf.layers.dense(encoder_inputs, EMBEDDING_SIZE)
    encoder_cell_fw = tf.contrib.rnn.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.LSTMCell(HIDDEN_LAYER_SIZE), keep_prob) for _ in range(NUM_OF_LAYER)])
    encoder_cell_bw = tf.contrib.rnn.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.LSTMCell(HIDDEN_LAYER_SIZE), keep_prob) for _ in range(NUM_OF_LAYER)])
    encoder_outputs, encoder_state = tf.nn.bidirectional_dynamic_rnn(encoder_cell_fw, encoder_cell_bw, 
                                       encoder_inputs_embedded, 
                                       dtype=tf.float32)
    encoder_outputs = tf.concat(encoder_outputs, 2)

# Define Decoder for training

In [None]:
with tf.name_scope('training_decoder'):
    decoder_cell = tf.contrib.rnn.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.LSTMCell(HIDDEN_LAYER_SIZE), keep_prob) for _ in range(NUM_OF_LAYER)]) 
    # embedding for decoder
    embeddings = tf.Variable(tf.random_uniform([num_of_words, EMBEDDING_SIZE], -1.0, 1.0), dtype=tf.float32)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs)

    attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
        num_units=HIDDEN_LAYER_SIZE, memory=encoder_outputs)
    attn_cell = tf.contrib.seq2seq.AttentionWrapper(
        decoder_cell, attention_mechanism, attention_layer_size=HIDDEN_LAYER_SIZE)
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(
            attn_cell, num_of_words
        )

    training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(decoder_inputs_embedded, 
                                               sequence_length_fake, embeddings, sampling_prob)
    training_decoder = tf.contrib.seq2seq.BasicDecoder(out_cell, 
                                              training_helper, 
                                            initial_state = out_cell.zero_state(dtype=tf.float32, batch_size=BATCH_SIZE))
    # unrolling the decoder layer
    training_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder, 
                                                      impute_finished = True)

# Define Decoder for inference

In [None]:
with tf.variable_scope('inference_decoder', reuse = True):
    inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings, 
                                                          tf.fill([BATCH_SIZE], BOS), 
                                                          EOS)

    inference_decoder = tf.contrib.seq2seq.BasicDecoder(out_cell, 
                                                  inference_helper, 
                                                initial_state = out_cell.zero_state(dtype=tf.float32, batch_size=BATCH_SIZE))

    inference_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                                                          impute_finished=True, maximum_iterations= max_length)

# Define the training logits

In [None]:
training_logits = tf.identity(training_outputs.rnn_output, name='logits')
pred_output = tf.identity(inference_outputs.rnn_output, name='logits')
masks = tf.sequence_mask(sequence_length, max_length, name = 'mask', dtype=tf.float32)

In [None]:
training_logits
print(max_length)

41


In [None]:
# Define training
with tf.name_scope("optimization"):
    # Loss function - weighted softmax cross entropy
    cost = tf.contrib.seq2seq.sequence_loss(
        training_logits,
        decoder_targets,
        masks)

    # Optimizer
    optimizer = tf.train.AdamOptimizer(1e-3)

    # Gradient Clipping
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
    train_op = optimizer.apply_gradients(capped_gradients)
tf.summary.scalar('loss', cost)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


<tf.Tensor 'loss:0' shape=() dtype=string>

# Tensorbaord log storing

In [None]:
summaries_dir = './log'
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(summaries_dir + '/train',
                                      sess.graph)
test_writer = tf.summary.FileWriter(summaries_dir + '/test')

# Training

In [None]:
# training start
import math
sess.run(tf.global_variables_initializer())

epoc = 7000
fake_max_sequence = np.array([max_length] * BATCH_SIZE)
for i in range(epoc):
    sample_prob_input = min(float(i) / epoc + 0.2, 1.0)
    for j in range(len(X) // BATCH_SIZE):
        X_batch, y_inputs_batch, y_targets_batch, sequence_length_batch = generate_batch(X, y_inputs, y_targets, word_idx,
         sequence_lengths, BATCH_SIZE)
#         print(y_inputs_batch.shape, y_targets_batch.shape, max(sequence_length_batch))
        _, loss, prediction  = sess.run([train_op, cost, pred_output], feed_dict= {encoder_inputs : X_batch, decoder_inputs : y_inputs_batch,
                                              decoder_targets: y_targets_batch, sequence_length : sequence_length_batch,
                                                                      sequence_length_fake : fake_max_sequence, sampling_prob : sample_prob_input})
    print( [ idx_word[idx] for idx in np.argmax(prediction[0], axis = 1) ])
    print( 'truth:', [ idx_word[y_targets_batch[0,k]] for k in range(max_length)])
    print("epoch {0}: loss : {1}".format(i, loss))

['A', 'man', 'is', 'UWK', 'a', 'UWK', 'EOS']
truth: ['A', 'UWK', 'man', 'is', 'UWK', 'as', 'his', 'UWK', 'is', 'UWK', 'in', 'a', 'UWK', 'UWK', 'while', 'UWK', 'are', 'driven', 'into', 'it', 'and', 'UWK', 'over', 'his', 'face', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
epoch 0: loss : 3.3908307552337646
['A', 'woman', 'is', 'cutting', 'a', 'UWK', 'EOS']
truth: ['Several', 'boys', 'are', 'playing', 'football', 'in', 'a', 'fenced', 'in', 'area', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
epoch 1: loss : 3.568258762359619


In [None]:
fake_max_sequence

In [None]:
https://github.com/JasonYao81000/MLDS2018SPRING/tree/master/hw2/hw2_1