In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import warnings; warnings.filterwarnings('ignore')
import tensorflow as tf

In [None]:
import reader

## Set Configs

In [None]:
init_scale = 0.1 # initial weight scale
learning_rate = 1.0 # initial learning rate
max_grad_norm = 5 # maximum permissible norm for the gradient clipping
num_layers = 2 # the number of layers in our model
num_steps = 20 # the total number of recurrence steps, also known as the number of layers when our RNN is unfolded
hidden_size_l1 = 256 # the number of processing units (neurons) in the hidden layers
hidden_size_l2 = 128
max_epoch_decay_lr = 4 # the maximum number of epochs trained with the initial learning rate
num_epochs = 15 # the total number of epochs in training
keep_prob = 1 # at 1, we ignore the Dropout Layer wrapping
decay = 0.5 # the decay for the learning rate
batch_size = 60 # the size for each batch of data
vocab_size = 10000 # the size of our vocabulary
embedding_vector_size = 200
is_training = 1 # training flag to separate training from testing

## Create Interactive Session

In [None]:
sess = tf.InteractiveSession()

## Load Datasets

In [None]:
data_dir = './datasets/data/simple-examples/data/' # data directory for our datasets

In [None]:
# reads the data and separates it into training, validation and testing data
raw_data = reader.ptb_raw_data(data_dir)
train_data, valid_data, test_data, vocab, word_to_id = raw_data

In [None]:
def id_to_word(id_list):
    line = []
    for w in id_list:
        for word, wid in word_to_id.items():
            if wid == w:
                line.append(word)
    return line

In [None]:
print('Total of Training Data:', len(train_data))
print('Word Examples:', id_to_word(train_data[0:8]))

## Set Placeholders

In [None]:
iterator = reader.ptb_iterator(train_data, batch_size, num_steps)
first_tupple = iterator.__next__()
X = first_tupple[0]
y = first_tupple[1]

In [None]:
input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
targets = tf.placeholder(tf.int32, [batch_size, num_steps])

## Create LSTM Cell

In [None]:
LSTM_cells = []

In [None]:
cell = tf.contrib.rnn.BasicLSTMCell(hidden_size_l1, forget_bias=0.0)
LSTM_cells.append(cell)

In [None]:
cell = tf.contrib.rnn.BasicLSTMCell(hidden_size_l2, forget_bias=0.0)
LSTM_cells.append(cell)

In [None]:
stacked_LSTM = tf.contrib.rnn.MultiRNNCell(LSTM_cells)

In [None]:
initial_state = stacked_LSTM.zero_state(batch_size, tf.float32)

## Create The Embeddings

In [None]:
embedding_vocab = tf.get_variable('embedding_vocab', [vocab_size, embedding_vector_size])

In [None]:
inputs = tf.nn.embedding_lookup(embedding_vocab, input_data)

In [None]:
lstm_out, hidden_state = tf.nn.dynamic_rnn(stacked_LSTM, inputs, initial_state=initial_state)

In [None]:
output = tf.reshape(lstm_out, [-1, hidden_size_l2])

## Set Operation

In [None]:
softmax_W = tf.get_variable('softmax_W', [hidden_size_l2, vocab_size])
softmax_b = tf.get_variable('softmax_b', [vocab_size])

In [None]:
logits_op = tf.matmul(output, softmax_W) + softmax_b
logits_op = tf.reshape(logits_op, [batch_size, num_steps, vocab_size])

In [None]:
probs_op = tf.nn.softmax(logits_op)

In [None]:
words_op = tf.argmax(probs_op, axis=2)

## Set Loss Function

In [None]:
loss_op = tf.contrib.seq2seq.sequence_loss(logits_op, targets, tf.ones([batch_size, num_steps], dtype=tf.float32), 
                                           average_across_timesteps=False, average_across_batch=True)

In [None]:
loss_op = tf.reduce_sum(loss_op)

## Set Optimizer

In [None]:
lr = tf.Variable(0.0, trainable=False)

In [None]:
train_vars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss_op, train_vars), max_grad_norm)

In [None]:
optimizer_op = tf.train.GradientDescentOptimizer(lr)
optimizer_op = optimizer_op.apply_gradients(zip(grads, train_vars))

## Train The Graph

In [None]:
def run_model(data, optimizer_op, verbose=False):
    
    epoch_size = ((len(data) // batch_size) - 1) // num_steps
    start_time = time.time()

    losses = 0.0
    iters = 0

    state = sess.run(initial_state)

    for step, (X, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):

        loss, state, words, _ = sess.run([loss_op, hidden_state, words_op, optimizer_op], 
                                          feed_dict={input_data: X, targets: y, initial_state: state})
        losses += loss
        iters += num_steps

        if verbose and step % (epoch_size // 10) == 10:
            speed = iters * batch_size / (time.time() - start_time)
            print(f'Iteration: {step}/ {epoch_size}, Perplexity: {np.exp(losses/iters):.3f}, Speed: {speed:.0f} wps')

    perplexity = np.exp(losses / iters)
    
    return perplexity

In [None]:
with tf.Session() as sess:

    init_op = tf.global_variables_initializer()
    
    sess.run(init_op) # run the init_op using an interactive session

    initializer = tf.random_uniform_initializer(-init_scale, init_scale)
    
    for i_epoch in range(1, num_epochs+1):
    
        # define the decay for this epoch
        lr_decay = decay ** max(i_epoch - max_epoch_decay_lr, 0.0)

        learning_rate = tf.assign(lr, learning_rate*lr_decay); learning_rate = sess.run(learning_rate)
        print(f'Epoch: {i_epoch}, Learning Rate: {learning_rate:.3f}')

        # run the loop for this epoch in the training model
        train_perplexity = run_model(train_data, optimizer_op, verbose=True)
        print(f'Epoch {i_epoch}, Train Perplexity: {train_perplexity:3f}')
        
        # run the loop for this epoch in the validation model
        valid_perplexity = run_model(valid_data, tf.no_op())
        print(f'Epoch {i_epoch}, Valid Perplexity: {valid_perplexity:3f}')
        
    # run the loop in the testing model to see how effective was our training
    test_perplexity = run_model(test_data, tf.no_op())
    print(f'Training LSTM Model is done. Test Perplexity: {test_perplexity:.3f}')

---