# The Lord of The RNN


Base code:
https://github.com/udacity/deep-learning/blob/master/intro-to-rnns/Anna_KaRNNa.ipynb

Importing all necessary packages

In [1]:
import os, re, sys, time
import numpy as np
import tensorflow as tf
from collections import namedtuple

Reading all the books that will be used.

Creating a vocabulary containing all existing characters and two dictionaries that map the character to an integer and an integer to a character.

In [2]:
booksPath = 'books'
text = ''

for book in sorted([book for book in os.listdir(booksPath) if os.path.isfile(os.path.join(booksPath, book))]):
    with open(booksPath+'/'+book, 'r') as f:
        lines = f.readlines()
        for line in lines:
            if re.match(r'^\s*$', line) != True:
                text += line
                
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [None]:
dict_vocab = {}
dict_vocab["vocab"] = vocab
dict_vocab["vocab_to_int"] = vocab_to_int


In [3]:
encoded[:100]

array([26, 34, 39, 46, 37, 34, 39, 29, 26, 37, 82,  1,  1,  1, 45, 59, 56,
        2, 38, 72, 70, 60, 54,  2, 66, 57,  2, 71, 59, 56,  2, 26, 60, 65,
       72, 69,  1,  1,  1,  1, 45, 59, 56, 69, 56,  2, 74, 52, 70,  2, 30,
       69, 72,  9,  2, 71, 59, 56,  2, 40, 65, 56,  9,  2, 74, 59, 66,  2,
       60, 65,  2, 26, 69, 55, 52,  2, 60, 70,  2, 54, 52, 63, 63, 56, 55,
        2, 34, 63, 98, 73, 52, 71, 52, 69, 23,  2, 52, 65, 55,  2], dtype=int32)

In [4]:
text[:100]

'AINULINDALË\n\n\nThe Music of the Ainur\n\n\n\nThere was Eru, the One, who in Arda is called Ilúvatar; and '

In [5]:
len(vocab)

108

In [6]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the number of characters per batch and number of batches we can make
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * characters_per_batch]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [7]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [8]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[ 26  34  39  46  37  34  39  29  26  37]
 [ 60  70  59   2  60  71  11   2  45  59]
 [ 70  60  63  55  72  69   9   2  53  72]
 [ 63  56  52  70  56   2  66  69   2  53]
 [ 59  72  69  69  60  56  55   2  72  67]
 [ 66   2  64  56   2  58  69  52  73  56]
 [ 66  71   2  52  65  70  74  56  69   2]
 [ 66  69  65  11   2 103  48  56   2  57]
 [ 71   2  71  59  56  64   9   2  54  66]
 [ 52  70   2  71  66  66   2  70  59  66]]

y
 [[ 34  39  46  37  34  39  29  26  37  82]
 [ 70  59   2  60  71  11   2  45  59  56]
 [ 60  63  55  72  69   9   2  53  72  60]
 [ 56  52  70  56   2  66  69   2  53  56]
 [ 72  69  69  60  56  55   2  72  67   2]
 [  2  64  56   2  58  69  52  73  56  63]
 [ 71   2  52  65  70  74  56  69   2  52]
 [ 69  65  11   2 103  48  56   2  57  56]
 [  2  71  59  56  64   9   2  54  66  63]
 [ 70   2  71  66  66   2  70  59  66  69]]


In [9]:
def build_inputs(batch_size, num_steps):
    ''' Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        batch_size: Batch size, number of sequences per batch
        num_steps: Number of sequence steps in a batch
        
    '''
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [10]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Build LSTM cell.
    
        Arguments
        ---------
        keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        lstm_size: Size of the hidden layers in the LSTM cells
        num_layers: Number of LSTM layers
        batch_size: Batch size

    '''
    ### Build the LSTM Cell
    
    def build_cell(lstm_size, keep_prob):
        # Use a basic LSTM cell
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        
        # Add dropout to the cell
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [11]:
def build_output(lstm_output, in_size, out_size):
    ''' Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        
        x: Input tensor
        in_size: Size of the input tensor, for example, size of the LSTM cells
        out_size: Size of this softmax layer
    
    '''

    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    # That is, the shape should be batch_size*num_steps rows by lstm_size columns
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [12]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        logits: Logits from final fully connected layer
        targets: Targets for supervised learning
        lstm_size: Number of LSTM hidden units
        num_classes: Number of classes in targets
        
    '''
    
    # One-hot encode targets and reshape to match logits, one row per batch_size per step
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

In [13]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Build optmizer for training, using gradient clipping.
    
        Arguments:
        loss: Network loss
        learning_rate: Learning rate for optimizer
    
    '''
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [14]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Build the LSTM cell
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [15]:
batch_size = 10         # Sequences per batch
num_steps = 50          # Number of sequence steps per batch
lstm_size = 128         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.01    # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [16]:
epochs = 15
# Save every N iterations
save_every_n = 1000
show_every_n = 1000

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=5)

config=tf.ConfigProto(
    device_count={"CPU":16}, 
    inter_op_parallelism_threads=16, 
    intra_op_parallelism_threads=16,
)

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            if (counter % show_every_n == 0):
                print('Epoch: {}/{}... '.format(e+1, epochs),
                      'Training Step: {}... '.format(counter),
                      'Training loss: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/e{}_i{}_l{}.ckpt".format(e+1, counter, lstm_size))
    
    saver.save(sess, "checkpoints/e{}_i{}_l{}.ckpt".format(e+1, counter, lstm_size))

Epoch: 1/15...  Training Step: 1000...  Training loss: 2.0783...  0.1644 sec/batch
Epoch: 1/15...  Training Step: 2000...  Training loss: 1.7992...  0.0839 sec/batch
Epoch: 1/15...  Training Step: 3000...  Training loss: 1.8999...  0.0862 sec/batch
Epoch: 1/15...  Training Step: 4000...  Training loss: 1.7004...  0.1201 sec/batch
Epoch: 1/15...  Training Step: 5000...  Training loss: 1.7649...  0.0881 sec/batch
Epoch: 1/15...  Training Step: 6000...  Training loss: 1.5744...  0.0856 sec/batch
Epoch: 1/15...  Training Step: 7000...  Training loss: 1.7017...  0.0865 sec/batch
Epoch: 2/15...  Training Step: 8000...  Training loss: 1.7101...  0.0839 sec/batch
Epoch: 2/15...  Training Step: 9000...  Training loss: 1.7555...  0.0857 sec/batch
Epoch: 2/15...  Training Step: 10000...  Training loss: 1.6890...  0.1117 sec/batch
Epoch: 2/15...  Training Step: 11000...  Training loss: 1.7097...  0.2199 sec/batch
Epoch: 2/15...  Training Step: 12000...  Training loss: 1.7094...  0.1198 sec/batch
E

Epoch: 13/15...  Training Step: 99000...  Training loss: 1.6740...  0.1416 sec/batch
Epoch: 14/15...  Training Step: 100000...  Training loss: 1.5571...  0.1190 sec/batch
Epoch: 14/15...  Training Step: 101000...  Training loss: 1.4686...  0.1068 sec/batch
Epoch: 14/15...  Training Step: 102000...  Training loss: 1.4647...  0.1207 sec/batch
Epoch: 14/15...  Training Step: 103000...  Training loss: 1.5873...  0.0854 sec/batch
Epoch: 14/15...  Training Step: 104000...  Training loss: 1.6049...  0.1167 sec/batch
Epoch: 14/15...  Training Step: 105000...  Training loss: 1.6603...  0.1111 sec/batch
Epoch: 14/15...  Training Step: 106000...  Training loss: 1.6969...  0.1089 sec/batch
Epoch: 15/15...  Training Step: 107000...  Training loss: 1.6003...  0.1072 sec/batch
Epoch: 15/15...  Training Step: 108000...  Training loss: 1.5646...  0.1189 sec/batch
Epoch: 15/15...  Training Step: 109000...  Training loss: 1.5841...  0.1075 sec/batch
Epoch: 15/15...  Training Step: 110000...  Training los

In [16]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/e15_i114540_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/e15_i111000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/e15_i112000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/e15_i113000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/e15_i114000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/e15_i114540_l128.ckpt"

In [17]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [18]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [19]:
tf.train.latest_checkpoint('checkpoints')

'checkpoints/e15_i114540_l128.ckpt'

In [20]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp1 = sample(checkpoint, 750, lstm_size, len(vocab), prime="Fëanor and Fingolfin")
print(samp1)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
Fëanor and Fingolfin and Men were so at the top and saw the brothers that the stars of hard they could go had a ground of he shall. In the trees of the treps had some words; and he had said at his fear and sheer on their booled; and the trees at tell her. But a bow her stands say and the boat on the despair of the sea and and shining, and he had nearer they came to the store of a long shoping they would see the black beard, and shall see. The white was to the saunt of the song and shoulders of any stirred. ‘The woods was been a between words and wooded the with a great waters as a wood with take one of sat and saw a good.’ I called the sense of the March of Sauloth in the Song of the Silmarils of the Elvens of the Misty Ring of the Naun he shall not come on a s


In [21]:
samp2 = sample(checkpoint, 750, lstm_size, len(vocab), prime="Beren and Luthien")
print(samp2)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
Beren and Luthien, and they deven at how seemed in. ‘I had been,” he said and some are that still huse here again, and seen hobbits of the song of Minas Tirith and there the shall a long first shield they seemed alone; but the still seeks and burrent, standed on the ships was a shadow about the stars in take the learn of the still of those that here, and she can hold on a ships of the time as that seen, but they shall not hear. He was a blanken already he soon, and that he were the black, and they hoursed to the star of a stream, still as the lands of his would heard and which think went the listens that the same and searth and the leaves of the stofter of the Might of the Elves of the North of the Ring of Gondor. But a golden hard seemers and as any wellow.


In [22]:
samp3 = sample(checkpoint, 750, lstm_size, len(vocab), prime="Aragorn and Arwen")
print(samp3)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
Aragorn and Arwen; from. Bilbo has nucking if the stranges tone that as were brought him on the shape, they seemed and set. Then they was a stone tone to him, and he had sinted a middle of through the stars, that seemed to the stone as they had been stared and best.

‘I was netted in still a little andow that was all the world of his head, and have the less took were to here with the hands of Minas Tarin. But they was a laster of stand or to hobbits with him, and say of this hall of the Merry of the Noldor of the Men the listentar, and they had become of the still, but it were sharped a bear of the deal of this shore of the Stirgount in his, and then the lunged west which he shall be to somp them as he saw the time on the stoused sea, and so there were forde


In [23]:
samp4 = sample(checkpoint, 750, lstm_size, len(vocab), prime="Smaug, Bilbo and Gandalf")
print(samp4)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
Smaug, Bilbo and Gandalf alone in hobbits than they deven a foring, both a should best, and to a stories seemed at our first and star at one back. There had too both and the time with his head and theys, but they heard to him were battle track there. But he walls to the holain.

‘I came up and the house though you set him. If they had sat to think, and how the white, they were things, and his hound had been became to a ground to an is the sons, and he had brought the stone, and all the wise and a shone of the hand of these sent of as had say the tankes and to his sank at a book of the three a steper, but in his same traid and was bore and all them out of his far the last and ferchour at, but the rise of to the high tale. I had some hare or belossed in the station.



In [24]:
samp5 = sample(checkpoint, 750, lstm_size, len(vocab), prime="Ents and Entwives")
print(samp5)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
Ents and Entwives of Men through the Saruman. Then went and bethe was the thought; and he was buitted is beated, to see helped to her breached the thought of the Marth that hart the tines.

‘To that shall all the lisses. But the hand of the shirr than any hand was taken that the song of the Morner our time they had started and that set in the top of the horn of the Mountain, and as a bottom of a bragening wonder in the way with his things and strided of the bright. All there were a large shadow of the sound of the worm in her horses to the song of the Shire, and the stars that with the tares of the waters of his tried of hard the strish was business of the water of his top and as all the same and the white horse and seemed;



They were bundled the shadow th


In [25]:
samp6 = sample(checkpoint, 750, lstm_size, len(vocab), prime="A new fellowship of the ring")
print(samp6)

INFO:tensorflow:Restoring parameters from checkpoints/e15_i114540_l128.ckpt
A new fellowship of the rings with a bank, shorter at all the world.

There was he sang oft, but at least the tarred were they wish as he had said they sat on it and hurry that the shadow they had a galrilings and here. All to the waters was been began the bear and tried in the dark bent which she stowed.

‘Then it was neas of stories that then then, best a some as they sound in that they deep and to the day is them. Bilbo were not a side to a lord, but that his shorls all the way or horn, all the like sang, and then the way, what were stay. The storiss though we said.

‘That say and horry tha the with was the what all him. As I should tell we there some soon at his fellows and his tale as all a long wars. A time was strong, and who were all them to time. He seemed, se
