In [1]:
# Adapted from : https://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html

import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import os
import urllib.request
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def ptb_iterator(raw_data, batch_size, num_steps, steps_ahead=1):

    raw_data = np.array(raw_data, dtype=np.int32)
    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    offset = 0
    if data_len % batch_size:
        offset = np.random.randint(0, data_len % batch_size)
    for i in range(batch_size):
        data[i] = raw_data[batch_len * i + offset:batch_len * (i + 1) + offset]
    epoch_size = (batch_len - steps_ahead) // num_steps
    if epoch_size == 0:
        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
        y = data[:, i*num_steps+1:(i+1)*num_steps+steps_ahead]
        yield (x, y)
    if epoch_size * num_steps < batch_len - steps_ahead:
        yield (data[:, epoch_size*num_steps : batch_len - steps_ahead], data[:, epoch_size*num_steps + 1:])


def shuffled_ptb_iterator(raw_data, batch_size, num_steps):
    raw_data = np.array(raw_data, dtype=np.int32)
    r = len(raw_data) % num_steps
    if r:
        n = np.random.randint(0, r)
        raw_data = raw_data[n:n + len(raw_data) - r]
    raw_data = np.reshape(raw_data, [-1, num_steps])
    np.random.shuffle(raw_data)
    num_batches = int(np.ceil(len(raw_data) / batch_size))
    for i in range(num_batches):
        data = raw_data[i*batch_size:min(len(raw_data), (i+1)*batch_size),:]
        yield (data[:,:-1], data[:,1:])

In [3]:
"""
Load and process data, utility functions
"""

file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)

with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data

def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield ptb_iterator(data, batch_size, num_steps)

def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1

                if X.shape[1] != g['x'].shape[1]:
                    break 
                    
                feed_dict={g['x']: X, g['y']: Y}
                if training_state is not None:
                    feed_dict[g['init_state']] = training_state
                training_loss_, training_state, _ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step']],
                                                             feed_dict)
                training_loss += training_loss_
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        g['saver'].save(sess, "model_gru.ckpt")

    return training_losses

Data length: 1115394


In [4]:
def build_basic_rnn_graph_with_list(
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    x_one_hot = tf.one_hot(x, num_classes) # <n, t, f>
    rnn_inputs = [tf.squeeze(i,squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, axis=1)]

    cell = tf.contrib.rnn.GRUCell(state_size)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
    logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
    predictions = [tf.nn.softmax(logit) for logit in logits]

    y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(y, num_steps, axis=1)] # t arrays of tensor <n, o>

    loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
    losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
    total_loss = tf.reduce_mean(losses)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    saver = tf.train.Saver()

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions,
        saver = saver
    )

In [5]:
t = time.time()
g = build_basic_rnn_graph_with_list()
print("It took", time.time() - t, "seconds to build the graph.")

It took 26.46511220932007 seconds to build the graph.


In [6]:
t = time.time()
train_network(g, 10)
print("It took", time.time() - t, "seconds to train for 10 epochs.")

Average training loss for Epoch 0 : 3.70014415877
Average training loss for Epoch 1 : 3.28273518426
Average training loss for Epoch 2 : 3.23873631477
Average training loss for Epoch 3 : 3.14900733948
Average training loss for Epoch 4 : 3.03178640911
Average training loss for Epoch 5 : 2.90671122006
Average training loss for Epoch 6 : 2.79576170513
Average training loss for Epoch 7 : 2.70284665653
Average training loss for Epoch 8 : 2.62354471207
Average training loss for Epoch 9 : 2.5565119648
It took 255.19610476493835 seconds to train for 10 epochs.


In [9]:
def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    """ Accepts a current character, initial state"""

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if checkpoint is None:
            g['saver'].restore(sess, tf.train.latest_checkpoint('.'))

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    print("".join(chars))
    return("".join(chars))

In [10]:
# Training after 10 epoch
g = build_basic_rnn_graph_with_list(batch_size=1, num_steps = 1)
generate_characters(g, None, 1000, prompt='A', pick_top_chars=5)

INFO:tensorflow:Restoring parameters from ./model_gru.ckpt
ANTIE TI heour band wise mire, be wised

ord so the te th le anst
Aed an than has sint oou hind shis,
Anssan seend
the ters tat ait an to seens ons aist oor,

hon than there so thase th an thith wil aet,e nar the tho het sale mathe war thant aress oot tha to linet, tho  oord therse
 aatt or mire anet thand she torere th son ho ehes thes ao tharstare sire, mand soe me thet oo so le thet oou seas thir wone an te sind the sorthet tounde she teon,

or hane thet sher sarert on teot one then, ahes the hese thes te th meril mond th ee thas hother, thes fase ahat  ine mone ther the th mite tonds and wite mher thes here hhe than se eith tean sh thes ore ae ee sou  hetter tae thond and are th thend whe thand,
And while the  eree shir thetd mare he mil sien toe hher site tithe s ao thin, tit aou hon sang sath nind wir mant, an thon shes ho thest ea le sord whe teet the te thang hong aou thet orere hhas tha s ir she sirs ao thare thin the 

''