### Character prediction using RNNs

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import os
import requests
import random

In [3]:
import numpy as np
import tensorflow as tf

In [4]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

In [5]:
from six.moves import urllib
from bs4 import BeautifulSoup

In [6]:
print(np.__version__)
print(tf.__version__)

1.13.3
1.4.1


In [7]:
VOCABULARY = \
        " $%'()+,-./0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
        "\\^_abcdefghijklmnopqrstuvwxyz{|}"

In [8]:
lookup = {x: i for i, x in enumerate(VOCABULARY)}

In [9]:
sample_lookup = random.sample(lookup.items(), 10)
sample_lookup

[('^', 52),
 ('X', 48),
 ('l', 65),
 ('d', 57),
 ('B', 26),
 ('7', 18),
 (':', 21),
 ('G', 31),
 ('=', 23),
 ('J', 34)]

In [11]:
SEQUENCE_LENGTH = 2

In [12]:
def one_hot(batch, sequence_length):
    one_hot_batch = np.zeros((len(batch), sequence_length, len(VOCABULARY)))

    # Iterate through every line of text in a batch
    for index, line in enumerate(batch):
        line = [x for x in line if x in lookup]
        assert 2 <= len(line) <= MAX_SEQUENCE_LENGTH
        
        # Iterate through every character in a line
        for offset, character in enumerate(line):
            code = lookup[character]
            one_hot_batch[index, offset, code] = 1
    
    return one_hot_batch

#### Sequence length calculation

The sequence length here will **be the same for all our inputs** because they have been generated using the sliding window.

We've sliced away either the first frame (for the labels) or the last frame (for the input) so the sequence length will be *SEQUENCE_LENGTH - 1*

In [13]:
def get_mask(target):
    mask = tf.reduce_max(tf.abs(target), reduction_indices=2)
    return mask

def get_sequence_length(target):
    mask = get_mask(target)
    sequence_length = tf.reduce_sum(mask, reduction_indices=1)
    
    return sequence_length

#### RNN for training and prediction

In [14]:
num_neurons = 200
cell_layers = 2

num_classes = len(VOCABULARY)

In [15]:
def build_rnn(data, num_steps, sequence_length, initial=None):
    cell = tf.nn.rnn_cell.GRUCell(num_neurons)

    multi_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(num_neurons) for _ in range(cell_layers)])

    output, state = tf.nn.dynamic_rnn(
        inputs=data,
        cell=multi_cell,
        dtype=tf.float32,
        initial_state=initial,
        sequence_length=sequence_length)

    weight = tf.Variable(tf.truncated_normal([num_neurons, num_classes], stddev=0.01))
    bias = tf.Variable(tf.constant(0.1, shape=[num_classes]))

    flattened_output = tf.reshape(output, [-1, num_neurons])

    prediction = tf.nn.softmax(tf.matmul(flattened_output, weight) + bias)
    prediction = tf.reshape(prediction, [-1, num_steps, num_classes])

    return prediction, state

In [42]:
tf.reset_default_graph()

#### Set up the inputs to the RNN

* One batch of SEQUENCE_LENGTH characters is the input sequence
* The training X and the target y should be constructed from this input
* St is the input and St+1 is the target
* Slice the sequence to get X, X has the last frame cut away
* Slice the sequence to get the corresponding y, y has the first frame cut away


**Here this means that there is exactly one character at the input and this is used to predict exactly one character at the output**

In [43]:
sequence = tf.placeholder(tf.float32, [1, SEQUENCE_LENGTH, len(VOCABULARY)])

In [44]:
X = tf.slice(sequence, (0, 0, 0), (-1, SEQUENCE_LENGTH - 1, -1))
y = tf.slice(sequence, (0, 1, 0), (-1, -1, -1))

#### Placeholder for the internal recurrent activation state

Used to initialize the RNN after each character prediction. There are 2 sets of states because we use a multi-RNN cell for this RNN

In [45]:
state1 = tf.placeholder(tf.float32, [1, num_neurons])
state2 = tf.placeholder(tf.float32, [1, num_neurons])

In [46]:
state1, state2

(<tf.Tensor 'Placeholder_1:0' shape=(1, 200) dtype=float32>,
 <tf.Tensor 'Placeholder_2:0' shape=(1, 200) dtype=float32>)

In [47]:
sequence_length = get_sequence_length(y)

In [48]:
prediction, output = build_rnn(X, num_steps=SEQUENCE_LENGTH - 1,
                               sequence_length=sequence_length, initial=(state1, state2))

### Restore the trained model for prediction

In [199]:
checkpoint_dir = './sample_checkpoint_output'

In [200]:
sess = tf.Session()

In [201]:
checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)

In [202]:
checkpoint.model_checkpoint_path

u'./sample_checkpoint_output/char_pred-9'

#### Build the graph before restoring the parameters from our checkpoint

Restoring the model only initializes the variables, the graph has to be explicitly set up beforehand

In [203]:
if checkpoint and checkpoint.model_checkpoint_path:
    tf.train.Saver().restore(sess, checkpoint.model_checkpoint_path)

INFO:tensorflow:Restoring parameters from ./sample_checkpoint_output/char_pred-9


#### Initial characters and length of sequence to generate

In [204]:
gen_seed = 'We'
gen_length = 200

#### The initial state value of our RNN

We'll evaluate the recurrent activation state and feed it back for every character prediction

In [205]:
curr_state1 = np.zeros((1, num_neurons))
curr_state2 = np.zeros((1, num_neurons))

In [206]:
gen_text = gen_seed

#### Sample from the probability distribution of the RNN

* More dynamically generated sequences
* If we always chose the most likely character, we might predict the same sentence over and over again
* Words with high output probability are more likely to be chosen but less likely words are also possible

In [207]:
sampling_temperature = 0.4

def sample(dist):
    dist = np.log(dist) / sampling_temperature
    dist = np.exp(dist) / np.exp(dist).sum()
    choice = np.random.choice(len(dist), p=dist)
    choice = VOCABULARY[choice]

    return choice

#### Generate as many words as the length specified

In [208]:
for _ in range(gen_length):
    feed = {
        state1: curr_state1,
        state2: curr_state2,
        sequence: one_hot([gen_text[-1] + '?'], sequence_length=SEQUENCE_LENGTH)
    }

    # Feed the last recurrent activation to initialize our RNN
    gen_prediction_eval, (curr_state1, curr_state2) = sess.run(
        [prediction, output], feed)
    
    # Predict just the next character
    gen_text += sample(gen_prediction_eval[0, 0])

In [209]:
gen_text

'Weiiee r e nr  e   etee    isi  ncent  ene et  t iase e ne  n  tone en ee   n o  rene o t a    a   r n   tnret n    ren    eoncte ne tese eannsh  r      e aite    c   edtinee rin  ee   n e  mrn ner a   '