In [1]:
import tensorflow as tf
import numpy as np
from collections import Counter

train_file = open("./simple-examples/data/ptb.train.txt", 'r')
train_data = train_file.read().replace("\n", "<eos>").split()

c = Counter(train_data)
count_pairs = sorted(c.items(), key=lambda x: (-x[1], x[0]))
words, _ = zip(*count_pairs)
word_to_id = dict(zip(words, range(len(words))))
word_to_id = [word_to_id[word] for word in train_data if word in word_to_id]

In [6]:
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
epoch = ((len(word_to_id) // batch_size) - 1) // num_steps

In [23]:
with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-init_scale, init_scale)
    raw_data = tf.convert_to_tensor(word_to_id, name="raw_data", dtype=tf.int32)
    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    data = tf.reshape(raw_data[0 : batch_size * batch_len],[batch_size, batch_len])
    epoch_size = (batch_len - 1) // num_steps
    epoch_size = tf.identity(epoch_size, name="epoch_size")

    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
    x = tf.strided_slice(data, [0, i * num_steps], [batch_size, (i + 1) * num_steps])
    x.set_shape([batch_size, num_steps])
    y = tf.strided_slice(data, [0, i * num_steps + 1], [batch_size, (i + 1) * num_steps + 1])
    y.set_shape([batch_size, num_steps])
    
    
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_size, forget_bias=0.0, state_is_tuple=True)
    lstm_cell_with_dropout = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
    cell = tf.contrib.rnn.MultiRNNCell([lstm_cell_with_dropout for _ in range(num_layers)], state_is_tuple=True)
    
    initial_state = cell.zero_state(batch_size, tf.float32)

    with tf.device("/cpu:0"):
        embedding = tf.get_variable("embedding", [vocab_size, hidden_size], dtype=tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, x)
    
    outputs = list()
    state = initial_state
    for time_step in range(num_steps):
        (cell_output, state) = cell(inputs[:, time_step, :], state)
        outputs.append(cell_output)

    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, hidden_size])
    softmax_w = tf.Variable(tf.random_normal([hidden_size, vocab_size]))
    softmax_b = tf.Variable(tf.random_normal([vocab_size]))
    logits = tf.matmul(output, softmax_w) + softmax_b

    logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])
    loss = tf.contrib.seq2seq.sequence_loss(
        logits,
        y,
        tf.ones([batch_size, num_steps], dtype=tf.float32),
        average_across_timesteps=False,
        average_across_batch=True
    )

    cost = tf.reduce_sum(loss)
    final_state = state
    lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
    optimizer = tf.train.GradientDescentOptimizer(lr)
    train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step())

    new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
    lr_update = tf.assign(lr, new_lr)
    sv = tf.train.Supervisor()
    with sv.managed_session() as sess:
        for i in range(max_max_epoch):
            lr_decay = lr_decay ** max(i + 1 - max_epoch, 0.0)
            sess.run(lr_update, feed_dict={new_lr: learning_rate * lr_decay})
            iters = 0
            costs = 0.0
            state = sess.run(initial_state)
            fetches = {
              "cost": cost,
              "final_state": final_state,
              "eval_op": train_op
            }
            for step in range(epoch):
                feed_dict = {}
                for i, (c, h) in enumerate(initial_state):
                    feed_dict[c] = state[i].c
                    feed_dict[h] = state[i].h
                vals = sess.run(fetches, feed_dict)
                cost_ = vals["cost"]
                state = vals["final_state"]
                costs += cost_
                iters += num_steps
            print("Train Perplexity", np.exp(costs / iters))

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Starting queue runners.
Train Perplexity 348.73364266383055
Train Perplexity 188.3046186111691
Train Perplexity 148.6955413133737
Train Perplexity 127.58872554086734
Train Perplexity 113.45316650943278
Train Perplexity 103.5656751973764
Train Perplexity 96.24438601004744
Train Perplexity 90.76938918841648
Train Perplexity 86.24715167775376
Train Perplexity 82.96657328658412
Train Perplexity 80.08114449222587
Train Perplexity 77.67344758522034
Train Perplexity 75.70715412944713
